From 9d389d7f84bbb3a294eb05f7dfe2076e291fc150 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:05:44 +0200 Subject: xfrm: Add a xfrm type offload. We add a struct xfrm_type_offload so that we have the offloaded codepath separated to the non offloaded codepath. With this the non offloade and the offloaded codepath can coexist. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_state.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 5a597dbbe564..47fefe97d1e3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -251,6 +251,75 @@ static void xfrm_put_type(const struct xfrm_type *type) module_put(type->owner); } +static DEFINE_SPINLOCK(xfrm_type_offload_lock); +int xfrm_register_type_offload(const struct xfrm_type_offload *type, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type_offload **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_offload_map; + spin_lock_bh(&xfrm_type_offload_lock); + + if (likely(typemap[type->proto] == NULL)) + typemap[type->proto] = type; + else + err = -EEXIST; + spin_unlock_bh(&xfrm_type_offload_lock); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(xfrm_register_type_offload); + +int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, + unsigned short family) +{ + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + const struct xfrm_type_offload **typemap; + int err = 0; + + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; + typemap = afinfo->type_offload_map; + spin_lock_bh(&xfrm_type_offload_lock); + + if (unlikely(typemap[type->proto] != type)) + err = -ENOENT; + else + typemap[type->proto] = NULL; + spin_unlock_bh(&xfrm_type_offload_lock); + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(xfrm_unregister_type_offload); + +static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family) +{ + struct xfrm_state_afinfo *afinfo; + const struct xfrm_type_offload **typemap; + const struct xfrm_type_offload *type; + + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return NULL; + typemap = afinfo->type_offload_map; + + type = typemap[proto]; + if ((type && !try_module_get(type->owner))) + type = NULL; + + rcu_read_unlock(); + return type; +} + +static void xfrm_put_type_offload(const struct xfrm_type_offload *type) +{ + module_put(type->owner); +} + static DEFINE_SPINLOCK(xfrm_mode_lock); int xfrm_register_mode(struct xfrm_mode *mode, int family) { @@ -365,6 +434,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) xfrm_put_mode(x->inner_mode_iaf); if (x->outer_mode) xfrm_put_mode(x->outer_mode); + if (x->type_offload) + xfrm_put_type_offload(x->type_offload); if (x->type) { x->type->destructor(x); xfrm_put_type(x->type); @@ -2077,6 +2148,8 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) if (x->type == NULL) goto error; + x->type_offload = xfrm_get_type_offload(x->id.proto, family); + err = x->type->init_state(x); if (err) goto error; -- cgit v1.2.3-59-g8ed1b From 21f42cc95f07c1d7827b339c04442e147411e44b Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:05:53 +0200 Subject: xfrm: Move device notifications to a sepatate file This is needed for the upcomming IPsec device offloading. Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 1 + net/xfrm/Makefile | 2 +- net/xfrm/xfrm_device.c | 43 +++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_policy.c | 17 +---------------- 4 files changed, 46 insertions(+), 17 deletions(-) create mode 100644 net/xfrm/xfrm_device.c (limited to 'net/xfrm') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 159342f3e72b..ac984da27879 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1443,6 +1443,7 @@ struct xfrm6_tunnel { void xfrm_init(void); void xfrm4_init(void); int xfrm_state_init(struct net *net); +void xfrm_dev_init(void); void xfrm_state_fini(struct net *net); void xfrm4_state_init(void); void xfrm4_protocol_init(void); diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index c0e961983f17..55b2ac300995 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o + xfrm_sysctl.o xfrm_replay.o xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c new file mode 100644 index 000000000000..34a260a61be9 --- /dev/null +++ b/net/xfrm/xfrm_device.c @@ -0,0 +1,43 @@ +/* + * xfrm_device.c - IPsec device offloading code. + * + * Copyright (c) 2015 secunet Security Networks AG + * + * Author: + * Steffen Klassert + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + switch (event) { + case NETDEV_DOWN: + xfrm_garbage_collect(dev_net(dev)); + } + return NOTIFY_DONE; +} + +static struct notifier_block xfrm_dev_notifier = { + .notifier_call = xfrm_dev_event, +}; + +void __net_init xfrm_dev_init(void) +{ + register_netdevice_notifier(&xfrm_dev_notifier); +} diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 236cbbc0ab9c..7befca2a0773 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2929,21 +2929,6 @@ void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - - switch (event) { - case NETDEV_DOWN: - xfrm_garbage_collect(dev_net(dev)); - } - return NOTIFY_DONE; -} - -static struct notifier_block xfrm_dev_notifier = { - .notifier_call = xfrm_dev_event, -}; - #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { @@ -3020,7 +3005,7 @@ static int __net_init xfrm_policy_init(struct net *net) INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); if (net_eq(net, &init_net)) - register_netdevice_notifier(&xfrm_dev_notifier); + xfrm_dev_init(); return 0; out_bydst: -- cgit v1.2.3-59-g8ed1b From d77e38e612a017480157fe6d2c1422f42cb5b7e3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:06:10 +0200 Subject: xfrm: Add an IPsec hardware offloading API This patch adds all the bits that are needed to do IPsec hardware offload for IPsec states and ESP packets. We add xfrmdev_ops to the net_device. xfrmdev_ops has function pointers that are needed to manage the xfrm states in the hardware and to do a per packet offloading decision. Joint work with: Ilan Tayari Guy Shapiro Yossi Kuperman Signed-off-by: Guy Shapiro Signed-off-by: Ilan Tayari Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 14 +++++ include/net/xfrm.h | 65 +++++++++++++++++++++- include/uapi/linux/xfrm.h | 8 +++ net/ipv4/esp4.c | 7 +-- net/ipv4/xfrm4_output.c | 3 +- net/ipv6/esp6.c | 4 +- net/ipv6/xfrm6_output.c | 9 ++- net/xfrm/Makefile | 3 +- net/xfrm/xfrm_device.c | 138 +++++++++++++++++++++++++++++++++++++++++++++- net/xfrm/xfrm_input.c | 41 +++++++++++++- net/xfrm/xfrm_output.c | 44 +++++++++++++-- net/xfrm/xfrm_policy.c | 10 ++-- net/xfrm/xfrm_state.c | 74 +++++++++++++++++++++++++ net/xfrm/xfrm_user.c | 28 ++++++++++ 14 files changed, 424 insertions(+), 24 deletions(-) (limited to 'net/xfrm') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5bb03d181848..b3eb83db0223 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -824,6 +824,16 @@ struct netdev_xdp { }; }; +#ifdef CONFIG_XFRM_OFFLOAD +struct xfrmdev_ops { + int (*xdo_dev_state_add) (struct xfrm_state *x); + void (*xdo_dev_state_delete) (struct xfrm_state *x); + void (*xdo_dev_state_free) (struct xfrm_state *x); + bool (*xdo_dev_offload_ok) (struct sk_buff *skb, + struct xfrm_state *x); +}; +#endif + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1697,6 +1707,10 @@ struct net_device { const struct ndisc_ops *ndisc_ops; #endif +#ifdef CONFIG_XFRM + const struct xfrmdev_ops *xfrmdev_ops; +#endif + const struct header_ops *header_ops; unsigned int flags; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 54515d989365..17603bf190c1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -120,6 +120,13 @@ struct xfrm_state_walk { struct xfrm_address_filter *filter; }; +struct xfrm_state_offload { + struct net_device *dev; + unsigned long offload_handle; + unsigned int num_exthdrs; + u8 flags; +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@ -207,6 +214,8 @@ struct xfrm_state { struct xfrm_lifetime_cur curlft; struct tasklet_hrtimer mtimer; + struct xfrm_state_offload xso; + /* used to fix curlft->add_time when changing date */ long saved_tmo; @@ -1453,7 +1462,6 @@ struct xfrm6_tunnel { void xfrm_init(void); void xfrm4_init(void); int xfrm_state_init(struct net *net); -void xfrm_dev_init(void); void xfrm_state_fini(struct net *net); void xfrm4_state_init(void); void xfrm4_protocol_init(void); @@ -1559,6 +1567,7 @@ struct xfrmk_spdinfo { struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1641,6 +1650,11 @@ static inline int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) } #endif +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family); + struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type); @@ -1846,6 +1860,55 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) } #endif +#ifdef CONFIG_XFRM_OFFLOAD +void __net_init xfrm_dev_init(void); +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo); +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + + if (xso->dev) + xso->dev->xfrmdev_ops->xdo_dev_state_delete(x); +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ + struct xfrm_state_offload *xso = &x->xso; + struct net_device *dev = xso->dev; + + if (dev && dev->xfrmdev_ops) { + dev->xfrmdev_ops->xdo_dev_state_free(x); + xso->dev = NULL; + dev_put(dev); + } +} +#else +static inline void __net_init xfrm_dev_init(void) +{ +} + +static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) +{ + return 0; +} + +static inline void xfrm_dev_state_delete(struct xfrm_state *x) +{ +} + +static inline void xfrm_dev_state_free(struct xfrm_state *x) +{ +} + +static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + return false; +} +#endif + static inline int xfrm_mark_get(struct nlattr **attrs, struct xfrm_mark *m) { if (attrs[XFRMA_MARK]) diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 1fc62b239f1b..2b384ff09fa0 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -303,6 +303,7 @@ enum xfrm_attr_type_t { XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ XFRMA_PAD, + XFRMA_OFFLOAD_DEV, /* struct xfrm_state_offload */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -494,6 +495,13 @@ struct xfrm_address_filter { __u8 dplen; }; +struct xfrm_user_offload { + int ifindex; + __u8 flags; +}; +#define XFRM_OFFLOAD_IPV6 1 +#define XFRM_OFFLOAD_INBOUND 2 + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b1e24446e297..c6aba234b6e9 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -435,9 +435,6 @@ skip_cow2: aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv); aead_request_set_ad(req, assoclen); - seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + - ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); - memset(iv, 0, ivlen); memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), min(ivlen, 8)); @@ -470,6 +467,7 @@ static int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -478,7 +476,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) u8 nexthdr[2]; int padlen; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 7ee6518afa86..94b8702603bc 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -29,7 +29,8 @@ static int xfrm4_tunnel_check_size(struct sk_buff *skb) goto out; mtu = dst_mtu(skb_dst(skb)); - if (skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && skb_gso_network_seglen(skb) > ip_skb_dst_mtu(skb->sk, skb))) { skb->protocol = htons(ETH_P_IP); if (skb->sk) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ff54faa75631..3d3757d20d0a 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -450,6 +450,7 @@ error: static int esp_input_done2(struct sk_buff *skb, int err) { struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); @@ -458,7 +459,8 @@ static int esp_input_done2(struct sk_buff *skb, int err) int padlen; u8 nexthdr[2]; - kfree(ESP_SKB_CB(skb)->tmp); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); if (unlikely(err)) goto out; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 4d09ce6fa90e..8ae87d4ec5ff 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -73,11 +73,16 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) int mtu, ret = 0; struct dst_entry *dst = skb_dst(skb); + if (skb->ignore_df) + goto out; + mtu = dst_mtu(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->ignore_df && skb->len > mtu) { + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + skb_gso_network_seglen(skb) > ip6_skb_dst_mtu(skb))) { skb->dev = dst->dev; skb->protocol = htons(ETH_P_IPV6); @@ -89,7 +94,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } - +out: return ret; } diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 55b2ac300995..abf81b329dc1 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ xfrm_input.o xfrm_output.o \ - xfrm_sysctl.o xfrm_replay.o xfrm_device.o + xfrm_sysctl.o xfrm_replay.o +obj-$(CONFIG_XFRM_OFFLOAD) += xfrm_device.o obj-$(CONFIG_XFRM_STATISTICS) += xfrm_proc.o obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 34a260a61be9..9bac2ba9052c 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -22,13 +22,149 @@ #include #include +int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, + struct xfrm_user_offload *xuo) +{ + int err; + struct dst_entry *dst; + struct net_device *dev; + struct xfrm_state_offload *xso = &x->xso; + xfrm_address_t *saddr; + xfrm_address_t *daddr; + + if (!x->type_offload) + return 0; + + /* We don't yet support UDP encapsulation, TFC padding and ESN. */ + if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) + return 0; + + dev = dev_get_by_index(net, xuo->ifindex); + if (!dev) { + if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) { + saddr = &x->props.saddr; + daddr = &x->id.daddr; + } else { + saddr = &x->id.daddr; + daddr = &x->props.saddr; + } + + dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family); + if (IS_ERR(dst)) + return 0; + + dev = dst->dev; + + dev_hold(dev); + dst_release(dst); + } + + if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + dev_put(dev); + return 0; + } + + xso->dev = dev; + xso->num_exthdrs = 1; + xso->flags = xuo->flags; + + err = dev->xfrmdev_ops->xdo_dev_state_add(x); + if (err) { + dev_put(dev); + return err; + } + + return 0; +} +EXPORT_SYMBOL_GPL(xfrm_dev_state_add); + +bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) +{ + int mtu; + struct dst_entry *dst = skb_dst(skb); + struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + struct net_device *dev = x->xso.dev; + + if (!x->type_offload || x->encap) + return false; + + if ((x->xso.offload_handle && (dev == dst->path->dev)) && + !dst->child->xfrm && x->type->get_mtu) { + mtu = x->type->get_mtu(x, xdst->child_mtu_cached); + + if (skb->len <= mtu) + goto ok; + + if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu)) + goto ok; + } + + return false; + +ok: + if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_offload_ok) + return x->xso.dev->xfrmdev_ops->xdo_dev_offload_ok(skb, x); + + return true; +} +EXPORT_SYMBOL_GPL(xfrm_dev_offload_ok); + +int xfrm_dev_register(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_unregister(struct net_device *dev) +{ + return NOTIFY_DONE; +} + +static int xfrm_dev_feat_change(struct net_device *dev) +{ + if ((dev->features & NETIF_F_HW_ESP) && !dev->xfrmdev_ops) + return NOTIFY_BAD; + else if (!(dev->features & NETIF_F_HW_ESP)) + dev->xfrmdev_ops = NULL; + + if ((dev->features & NETIF_F_HW_ESP_TX_CSUM) && + !(dev->features & NETIF_F_HW_ESP)) + return NOTIFY_BAD; + + return NOTIFY_DONE; +} + +static int xfrm_dev_down(struct net_device *dev) +{ + if (dev->hw_features & NETIF_F_HW_ESP) + xfrm_dev_state_flush(dev_net(dev), dev, true); + + xfrm_garbage_collect(dev_net(dev)); + + return NOTIFY_DONE; +} + static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { + case NETDEV_REGISTER: + return xfrm_dev_register(dev); + + case NETDEV_UNREGISTER: + return xfrm_dev_unregister(dev); + + case NETDEV_FEAT_CHANGE: + return xfrm_dev_feat_change(dev); + case NETDEV_DOWN: - xfrm_garbage_collect(dev_net(dev)); + return xfrm_dev_down(dev); } return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 46bdb4fbed0b..362d655eac27 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -107,6 +107,8 @@ struct sec_path *secpath_dup(struct sec_path *src) sp->len = 0; sp->olen = 0; + memset(sp->ovec, 0, sizeof(sp->ovec[XFRM_MAX_OFFLOAD_DEPTH])); + if (src) { int i; @@ -207,8 +209,9 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) unsigned int family; int decaps = 0; int async = 0; - struct xfrm_offload *xo; bool xfrm_gro = false; + bool crypto_done = false; + struct xfrm_offload *xo = xfrm_offload(skb); if (encap_type < 0) { x = xfrm_input_state(skb); @@ -226,6 +229,37 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } + if (xo && (xo->flags & CRYPTO_DONE)) { + crypto_done = true; + x = xfrm_input_state(skb); + family = XFRM_SPI_SKB_CB(skb)->family; + + if (!(xo->status & CRYPTO_SUCCESS)) { + if (xo->status & + (CRYPTO_TRANSPORT_AH_AUTH_FAILED | + CRYPTO_TRANSPORT_ESP_AUTH_FAILED | + CRYPTO_TUNNEL_AH_AUTH_FAILED | + CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { + + xfrm_audit_state_icvfail(x, skb, + x->type->proto); + x->stats.integrity_failed++; + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); + goto drop; + } + + if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } + + goto lock; + } + daddr = (xfrm_address_t *)(skb_network_header(skb) + XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; @@ -311,7 +345,10 @@ lock: skb_dst_force(skb); dev_hold(skb->dev); - nexthdr = x->type->input(x, skb); + if (crypto_done) + nexthdr = x->type_offload->input_tail(x, skb); + else + nexthdr = x->type->input(x, skb); if (nexthdr == -EINPROGRESS) return 0; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8ba29fe58352..a15088613a6c 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -99,12 +99,13 @@ static int xfrm_output_one(struct sk_buff *skb, int err) skb_dst_force(skb); - /* Inner headers are invalid now. */ - skb->encapsulation = 0; - - err = x->type->output(x, skb); - if (err == -EINPROGRESS) - goto out; + if (xfrm_offload(skb)) { + x->type_offload->encap(x, skb); + } else { + err = x->type->output(x, skb); + if (err == -EINPROGRESS) + goto out; + } resume: if (err) { @@ -200,8 +201,38 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); + struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + secpath_reset(skb); + + if (xfrm_dev_offload_ok(skb, x)) { + struct sec_path *sp; + + sp = secpath_dup(skb->sp); + if (!sp) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); + kfree_skb(skb); + return -ENOMEM; + } + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + + sp->olen++; + sp->xvec[skb->sp->len++] = x; + xfrm_state_hold(x); + + if (skb_is_gso(skb)) { + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + + return xfrm_output2(net, sk, skb); + } + + if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) + goto out; + } + if (skb_is_gso(skb)) return xfrm_output_gso(net, sk, skb); @@ -214,6 +245,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) } } +out: return xfrm_output2(net, sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7befca2a0773..dd44ddc1aea5 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -116,11 +116,10 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa return afinfo; } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, - int tos, int oif, - const xfrm_address_t *saddr, - const xfrm_address_t *daddr, - int family) +struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, + const xfrm_address_t *saddr, + const xfrm_address_t *daddr, + int family) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -135,6 +134,7 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, return dst; } +EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 47fefe97d1e3..fc3c5aa38754 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -440,6 +440,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) x->type->destructor(x); xfrm_put_type(x->type); } + xfrm_dev_state_free(x); security_xfrm_state_free(x); kfree(x); } @@ -609,6 +610,8 @@ int __xfrm_state_delete(struct xfrm_state *x) net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); + xfrm_dev_state_delete(x); + /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. @@ -653,12 +656,41 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) return err; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0; + + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; + + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (xso->dev == dev && + (err = security_xfrm_state_delete(x)) != 0) { + xfrm_audit_state_delete(x, 0, task_valid); + return err; + } + } + } + + return err; +} #else static inline int xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid) { return 0; } + +static inline int +xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) +{ + return 0; +} #endif int xfrm_state_flush(struct net *net, u8 proto, bool task_valid) @@ -701,6 +733,48 @@ out: } EXPORT_SYMBOL(xfrm_state_flush); +int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid) +{ + int i, err = 0, cnt = 0; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid); + if (err) + goto out; + + err = -ESRCH; + for (i = 0; i <= net->xfrm.state_hmask; i++) { + struct xfrm_state *x; + struct xfrm_state_offload *xso; +restart: + hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) { + xso = &x->xso; + + if (!xfrm_state_kern(x) && xso->dev == dev) { + xfrm_state_hold(x); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + + err = xfrm_state_delete(x); + xfrm_audit_state_delete(x, err ? 0 : 1, + task_valid); + xfrm_state_put(x); + if (!err) + cnt++; + + spin_lock_bh(&net->xfrm.xfrm_state_lock); + goto restart; + } + } + } + if (cnt) + err = 0; + +out: + spin_unlock_bh(&net->xfrm.xfrm_state_lock); + return err; +} +EXPORT_SYMBOL(xfrm_dev_state_flush); + void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si) { spin_lock_bh(&net->xfrm.xfrm_state_lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4f7e62ddc17e..de3332e3f9e2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -595,6 +595,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + if (attrs[XFRMA_OFFLOAD_DEV] && + xfrm_dev_state_add(net, x, nla_data(attrs[XFRMA_OFFLOAD_DEV]))) + goto error; + if ((err = xfrm_alloc_replay_state_esn(&x->replay_esn, &x->preplay_esn, attrs[XFRMA_REPLAY_ESN_VAL]))) goto error; @@ -779,6 +783,23 @@ static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) return 0; } +static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb) +{ + struct xfrm_user_offload *xuo; + struct nlattr *attr; + + attr = nla_reserve(skb, XFRMA_OFFLOAD_DEV, sizeof(*xuo)); + if (attr == NULL) + return -EMSGSIZE; + + xuo = nla_data(attr); + + xuo->ifindex = xso->dev->ifindex; + xuo->flags = xso->flags; + + return 0; +} + static int copy_to_user_auth(struct xfrm_algo_auth *auth, struct sk_buff *skb) { struct xfrm_algo *algo; @@ -869,6 +890,10 @@ static int copy_to_user_state_extra(struct xfrm_state *x, &x->replay); if (ret) goto out; + if(x->xso.dev) + ret = copy_user_offload(&x->xso, skb); + if (ret) + goto out; if (x->security) ret = copy_sec_ctx(x->security, skb); out: @@ -2406,6 +2431,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_SA_EXTRA_FLAGS] = { .type = NLA_U32 }, [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, + [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2622,6 +2648,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(*x->coaddr)); if (x->props.extra_flags) l += nla_total_size(sizeof(x->props.extra_flags)); + if (x->xso.dev) + l += nla_total_size(sizeof(x->xso)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); -- cgit v1.2.3-59-g8ed1b From 7862b4058b9f10c9177f347e7d981511bac87213 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:06:50 +0200 Subject: esp: Add gso handlers for esp4 and esp6 This patch extends the xfrm_type by an encap function pointer and implements esp4_gso_encap and esp6_gso_encap. These functions doing the basic esp encapsulation for a GSO packet. In case the GSO packet needs to be segmented in software, we add gso_segment functions. This codepath is going to be used on esp hardware offloads. Signed-off-by: Steffen Klassert --- net/ipv4/esp4.c | 10 +++++- net/ipv4/esp4_offload.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/esp6.c | 8 +++-- net/ipv6/esp6_offload.c | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_replay.c | 3 +- 5 files changed, 203 insertions(+), 4 deletions(-) (limited to 'net/xfrm') diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 91e6a402e22e..4382f306935b 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -161,11 +161,19 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb, * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + __u32 seqhi; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo) + seqhi = xo->seq.hi; + else + seqhi = XFRM_SKB_CB(skb)->seq.output.hi; + extra->esphoff = (unsigned char *)esph - skb_transport_header(skb); esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); extra->seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + esph->seq_no = htonl(seqhi); } esph->spi = x->id.spi; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index efaaa44e1073..1e39564cb8b4 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -84,6 +84,97 @@ out: return NULL; } +static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct iphdr *iph = ip_hdr(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + int proto = iph->protocol; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + xo->proto = proto; +} + +static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + __u32 seq; + int err = 0; + struct sk_buff *skb2; + struct xfrm_state *x; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t esp_features = features; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (!xo) + goto out; + + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (!(features & NETIF_F_HW_ESP)) + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + + segs = x->outer_mode->gso_segment(x, skb, esp_features); + if (IS_ERR_OR_NULL(segs)) + goto out; + + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + xo = xfrm_offload(skb2); + xo->flags |= XFRM_GSO_SEGMENT; + xo->seq.low = seq; + xo->seq.hi = xfrm_replay_seqhi(x, seq); + + if(!(features & NETIF_F_HW_ESP)) + xo->flags |= CRYPTO_FALLBACK; + + x->outer_mode->xmit(x, skb2); + + err = x->type_offload->xmit(x, skb2, esp_features); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + if (!skb_is_gso(skb2)) + seq++; + else + seq += skb_shinfo(skb2)->gso_segs; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; @@ -173,6 +264,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ static const struct net_offload esp4_offload = { .callbacks = { .gro_receive = esp4_gro_receive, + .gso_segment = esp4_gso_segment, }, }; @@ -182,6 +274,7 @@ static const struct xfrm_type_offload esp_type_offload = { .proto = IPPROTO_ESP, .input_tail = esp_input_tail, .xmit = esp_xmit, + .encap = esp4_gso_encap, }; static int __init esp4_offload_init(void) diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index cc654a7afd91..82d3da81293e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -179,9 +179,14 @@ static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb, * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { + struct xfrm_offload *xo = xfrm_offload(skb); + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); *seqhi = esph->spi; - esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + if (xo) + esph->seq_no = htonl(xo->seq.hi); + else + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); } esph->spi = x->id.spi; @@ -223,7 +228,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info struct sk_buff *trailer; int tailen = esp->tailen; - *skb_mac_header(skb) = IPPROTO_ESP; esph = ip_esp_hdr(skb); if (!skb_cloned(skb)) { diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 5dd20c0de956..06e972135ab0 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -86,6 +86,97 @@ out: return NULL; } +static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ip_esp_hdr *esph; + struct ipv6hdr *iph = ipv6_hdr(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + int proto = iph->nexthdr; + + skb_push(skb, -skb_network_offset(skb)); + esph = ip_esp_hdr(skb); + *skb_mac_header(skb) = IPPROTO_ESP; + + esph->spi = x->id.spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + + xo->proto = proto; +} + +static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + __u32 seq; + int err = 0; + struct sk_buff *skb2; + struct xfrm_state *x; + struct ip_esp_hdr *esph; + struct crypto_aead *aead; + struct sk_buff *segs = ERR_PTR(-EINVAL); + netdev_features_t esp_features = features; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (xo) + goto out; + + seq = xo->seq.low; + + x = skb->sp->xvec[skb->sp->len - 1]; + aead = x->data; + esph = ip_esp_hdr(skb); + + if (esph->spi != x->id.spi) + goto out; + + if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) + goto out; + + __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); + + skb->encap_hdr_csum = 1; + + if (!(features & NETIF_F_HW_ESP)) + esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); + + segs = x->outer_mode->gso_segment(x, skb, esp_features); + if (IS_ERR_OR_NULL(segs)) + goto out; + + __skb_pull(skb, skb->data - skb_mac_header(skb)); + + skb2 = segs; + do { + struct sk_buff *nskb = skb2->next; + + xo = xfrm_offload(skb2); + xo->flags |= XFRM_GSO_SEGMENT; + xo->seq.low = seq; + xo->seq.hi = xfrm_replay_seqhi(x, seq); + + if(!(features & NETIF_F_HW_ESP)) + xo->flags |= CRYPTO_FALLBACK; + + x->outer_mode->xmit(x, skb2); + + err = x->type_offload->xmit(x, skb2, esp_features); + if (err) { + kfree_skb_list(segs); + return ERR_PTR(err); + } + + if (!skb_is_gso(skb2)) + seq++; + else + seq += skb_shinfo(skb2)->gso_segs; + + skb_push(skb2, skb2->mac_len); + skb2 = nskb; + } while (skb2); + +out: + return segs; +} + static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; @@ -176,6 +267,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features static const struct net_offload esp6_offload = { .callbacks = { .gro_receive = esp6_gro_receive, + .gso_segment = esp6_gso_segment, }, }; @@ -185,6 +277,7 @@ static const struct xfrm_type_offload esp6_type_offload = { .proto = IPPROTO_ESP, .input_tail = esp6_input_tail, .xmit = esp6_xmit, + .encap = esp6_gso_encap, }; static int __init esp6_offload_init(void) diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index cdc2e2e71bff..20e68a3cad9a 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -45,7 +45,8 @@ u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq) return seq_hi; } - +EXPORT_SYMBOL(xfrm_replay_seqhi); +; static void xfrm_replay_notify(struct xfrm_state *x, int event) { struct km_event c; -- cgit v1.2.3-59-g8ed1b From d7dbefc45cf5517a0cf9a0391316fab0abe71bd2 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:07:01 +0200 Subject: xfrm: Add xfrm_replay_overflow functions for offloading This patch adds functions that handles IPsec sequence numbers for GSO segments and TSO offloading. We need to calculate and update the sequence numbers based on the segments that GSO/TSO will generate. We need this to keep software and hardware sequence number counter in sync. Signed-off-by: Steffen Klassert --- net/xfrm/xfrm_replay.c | 159 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) (limited to 'net/xfrm') diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 20e68a3cad9a..8b23c5bcf8e8 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -559,6 +559,158 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } +#ifdef CONFIG_XFRM_OFFLOAD +static int xfrm_replay_overflow_offload(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct net *net = xs_net(x); + struct xfrm_offload *xo = xfrm_offload(skb); + __u32 oseq = x->replay.oseq; + + if (!xo) + return xfrm_replay_overflow(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + xo->seq.low = oseq; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + xo->seq.low = oseq + 1; + oseq += skb_shinfo(skb)->gso_segs; + } + + XFRM_SKB_CB(skb)->seq.output.hi = 0; + xo->seq.hi = 0; + if (unlikely(oseq < x->replay.oseq)) { + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + + x->replay.oseq = oseq; + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_overflow_offload_bmp(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + __u32 oseq = replay_esn->oseq; + + if (!xo) + return xfrm_replay_overflow_bmp(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + xo->seq.low = oseq; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + xo->seq.low = oseq + 1; + oseq += skb_shinfo(skb)->gso_segs; + } + + XFRM_SKB_CB(skb)->seq.output.hi = 0; + xo->seq.hi = 0; + if (unlikely(oseq < replay_esn->oseq)) { + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } else { + replay_esn->oseq = oseq; + } + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static int xfrm_replay_overflow_offload_esn(struct xfrm_state *x, struct sk_buff *skb) +{ + int err = 0; + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_replay_state_esn *replay_esn = x->replay_esn; + struct net *net = xs_net(x); + __u32 oseq = replay_esn->oseq; + __u32 oseq_hi = replay_esn->oseq_hi; + + if (!xo) + return xfrm_replay_overflow_esn(x, skb); + + if (x->type->flags & XFRM_TYPE_REPLAY_PROT) { + if (!skb_is_gso(skb)) { + XFRM_SKB_CB(skb)->seq.output.low = ++oseq; + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.low = oseq; + xo->seq.hi = oseq_hi; + } else { + XFRM_SKB_CB(skb)->seq.output.low = oseq + 1; + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; + xo->seq.low = oseq = oseq + 1; + xo->seq.hi = oseq_hi; + oseq += skb_shinfo(skb)->gso_segs; + } + + if (unlikely(oseq < replay_esn->oseq)) { + XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; + xo->seq.hi = oseq_hi; + + if (replay_esn->oseq_hi == 0) { + replay_esn->oseq--; + replay_esn->oseq_hi--; + xfrm_audit_state_replay_overflow(x, skb); + err = -EOVERFLOW; + + return err; + } + } + + replay_esn->oseq = oseq; + replay_esn->oseq_hi = oseq_hi; + + if (xfrm_aevent_is_on(net)) + x->repl->notify(x, XFRM_REPLAY_UPDATE); + } + + return err; +} + +static const struct xfrm_replay xfrm_replay_legacy = { + .advance = xfrm_replay_advance, + .check = xfrm_replay_check, + .recheck = xfrm_replay_check, + .notify = xfrm_replay_notify, + .overflow = xfrm_replay_overflow_offload, +}; + +static const struct xfrm_replay xfrm_replay_bmp = { + .advance = xfrm_replay_advance_bmp, + .check = xfrm_replay_check_bmp, + .recheck = xfrm_replay_check_bmp, + .notify = xfrm_replay_notify_bmp, + .overflow = xfrm_replay_overflow_offload_bmp, +}; + +static const struct xfrm_replay xfrm_replay_esn = { + .advance = xfrm_replay_advance_esn, + .check = xfrm_replay_check_esn, + .recheck = xfrm_replay_recheck_esn, + .notify = xfrm_replay_notify_esn, + .overflow = xfrm_replay_overflow_offload_esn, +}; +#else static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, @@ -582,6 +734,7 @@ static const struct xfrm_replay xfrm_replay_esn = { .notify = xfrm_replay_notify_esn, .overflow = xfrm_replay_overflow_esn, }; +#endif int xfrm_init_replay(struct xfrm_state *x) { @@ -596,10 +749,12 @@ int xfrm_init_replay(struct xfrm_state *x) if (replay_esn->replay_window == 0) return -EINVAL; x->repl = &xfrm_replay_esn; - } else + } else { x->repl = &xfrm_replay_bmp; - } else + } + } else { x->repl = &xfrm_replay_legacy; + } return 0; } -- cgit v1.2.3-59-g8ed1b From f6e27114a60a0afdec40db1bf7f6da37b565745a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:07:28 +0200 Subject: net: Add a xfrm validate function to validate_xmit_skb When we do IPsec offloading, we need a fallback for packets that were targeted to be IPsec offloaded but rerouted to a device that does not support IPsec offload. For that we add a function that checks the offloading features of the sending device and and flags the requirement of a fallback before it calls the IPsec output function. The IPsec output function adds the IPsec trailer and does encryption if needed. Signed-off-by: Steffen Klassert --- include/net/xfrm.h | 6 ++++++ net/core/dev.c | 3 +++ net/xfrm/xfrm_device.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'net/xfrm') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 17603bf190c1..6793a30c66b1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1862,6 +1862,7 @@ static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) #ifdef CONFIG_XFRM_OFFLOAD void __net_init xfrm_dev_init(void); +int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); @@ -1890,6 +1891,11 @@ static inline void __net_init xfrm_dev_init(void) { } +static inline int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +{ + return 0; +} + static inline int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) { return 0; diff --git a/net/core/dev.c b/net/core/dev.c index ef9fe60ee294..5f0a864623e8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2972,6 +2972,9 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device __skb_linearize(skb)) goto out_kfree_skb; + if (validate_xmit_xfrm(skb, features)) + goto out_kfree_skb; + /* If packet is not checksummed and device does not * support checksumming for this protocol, complete * checksumming here. diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 9bac2ba9052c..8ec8a3fcf8d4 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -22,6 +22,35 @@ #include #include +int validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t features) +{ + int err; + struct xfrm_state *x; + struct xfrm_offload *xo = xfrm_offload(skb); + + if (skb_is_gso(skb)) + return 0; + + if (xo) { + x = skb->sp->xvec[skb->sp->len - 1]; + if (xo->flags & XFRM_GRO || x->xso.flags & XFRM_OFFLOAD_INBOUND) + return 0; + + x->outer_mode->xmit(x, skb); + + err = x->type_offload->xmit(x, skb, features); + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + return err; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + } + + return 0; +} +EXPORT_SYMBOL_GPL(validate_xmit_xfrm); + int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo) { -- cgit v1.2.3-59-g8ed1b From f1bd7d659ef0ba0f18c6f6afe7bbbd2410acffa0 Mon Sep 17 00:00:00 2001 From: Ilan Tayari Date: Fri, 14 Apr 2017 10:07:39 +0200 Subject: xfrm: Add encapsulation header offsets while SKB is not encrypted Both esp4 and esp6 used to assume that the SKB payload is encrypted and therefore the inner_network and inner_transport offsets are not relevant. When doing crypto offload in the NIC, this is no longer the case and the NIC driver needs these offsets so it can do TX TCP checksum offloading. This patch sets the inner_network and inner_transport members of the SKB, as well as encapsulation, to reflect the actual positions of these headers, and removes them only once encryption is done on the payload. Signed-off-by: Ilan Tayari Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_mode_transport.c | 2 ++ net/ipv4/xfrm4_mode_tunnel.c | 3 +++ net/ipv6/xfrm6_mode_transport.c | 1 + net/ipv6/xfrm6_mode_tunnel.c | 3 +++ net/xfrm/xfrm_output.c | 2 ++ 5 files changed, 11 insertions(+) (limited to 'net/xfrm') diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index 6c2411d09386..3d36644890bb 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -24,6 +24,8 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = ip_hdr(skb); int ihl = iph->ihl * 4; + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index d3f2434fa0b8..e6265e2c274e 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,6 +33,9 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *top_iph; int flags; + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct iphdr, protocol); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index eb9b36b06c1d..7a92c0f31912 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -27,6 +27,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; iph = ipv6_hdr(skb); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); hdr_len = x->type->hdr_offset(x, skb, &prevhdr); skb_set_mac_header(skb, (prevhdr - x->props.header_len) - skb->data); diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 19a60fc4c29b..02556e356f87 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -36,6 +36,9 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *top_iph; int dsfield; + skb_set_inner_network_header(skb, skb_network_offset(skb)); + skb_set_inner_transport_header(skb, skb_transport_offset(skb)); + skb_set_network_header(skb, -x->props.header_len); skb->mac_header = skb->network_header + offsetof(struct ipv6hdr, nexthdr); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index a15088613a6c..8c0b6722aaa8 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -205,6 +205,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) int err; secpath_reset(skb); + skb->encapsulation = 0; if (xfrm_dev_offload_ok(skb, x)) { struct sec_path *sp; @@ -218,6 +219,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) if (skb->sp) secpath_put(skb->sp); skb->sp = sp; + skb->encapsulation = 1; sp->olen++; sp->xvec[skb->sp->len++] = x; -- cgit v1.2.3-59-g8ed1b From bcd1f8a45e7d5804e4f7bd78a91348cfce3cb74a Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:07:49 +0200 Subject: xfrm: Prepare the GRO codepath for hardware offloading. On IPsec hardware offloading, we already get a secpath with valid state attached when the packet enters the GRO handlers. So check for hardware offload and skip the state lookup in this case. Signed-off-by: Steffen Klassert --- net/ipv4/esp4_offload.c | 42 ++++++++++++++++++++++------------------- net/ipv6/esp6_offload.c | 42 ++++++++++++++++++++++------------------- net/xfrm/xfrm_input.c | 50 ++++++++++++++++++++++++------------------------- 3 files changed, 71 insertions(+), 63 deletions(-) (limited to 'net/xfrm') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 1e39564cb8b4..f3e33c26dc33 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -43,27 +43,31 @@ static struct sk_buff **esp4_gro_receive(struct sk_buff **head, if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; - err = secpath_set(skb); - if (err) - goto out; - - if (skb->sp->len == XFRM_MAX_DEPTH) - goto out; - - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ip_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET); - if (!x) - goto out; - - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; - xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); - goto out; + if (!xo || !(xo->flags & CRYPTO_DONE)) { + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ip_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } } + xo->flags |= XFRM_GRO; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index 06e972135ab0..1cceeee7cc33 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -45,27 +45,31 @@ static struct sk_buff **esp6_gro_receive(struct sk_buff **head, if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; - err = secpath_set(skb); - if (err) - goto out; - - if (skb->sp->len == XFRM_MAX_DEPTH) - goto out; - - x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, - (xfrm_address_t *)&ipv6_hdr(skb)->daddr, - spi, IPPROTO_ESP, AF_INET6); - if (!x) - goto out; - - skb->sp->xvec[skb->sp->len++] = x; - skb->sp->olen++; - xo = xfrm_offload(skb); - if (!xo) { - xfrm_state_put(x); - goto out; + if (!xo || !(xo->flags & CRYPTO_DONE)) { + err = secpath_set(skb); + if (err) + goto out; + + if (skb->sp->len == XFRM_MAX_DEPTH) + goto out; + + x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, + (xfrm_address_t *)&ipv6_hdr(skb)->daddr, + spi, IPPROTO_ESP, AF_INET6); + if (!x) + goto out; + + skb->sp->xvec[skb->sp->len++] = x; + skb->sp->olen++; + + xo = xfrm_offload(skb); + if (!xo) { + xfrm_state_put(x); + goto out; + } } + xo->flags |= XFRM_GRO; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 362d655eac27..21c6cc965402 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -223,38 +223,38 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) seq = XFRM_SKB_CB(skb)->seq.input.low; goto resume; } + /* encap_type < -1 indicates a GRO call. */ encap_type = 0; seq = XFRM_SPI_SKB_CB(skb)->seq; - goto lock; - } - - if (xo && (xo->flags & CRYPTO_DONE)) { - crypto_done = true; - x = xfrm_input_state(skb); - family = XFRM_SPI_SKB_CB(skb)->family; - if (!(xo->status & CRYPTO_SUCCESS)) { - if (xo->status & - (CRYPTO_TRANSPORT_AH_AUTH_FAILED | - CRYPTO_TRANSPORT_ESP_AUTH_FAILED | - CRYPTO_TUNNEL_AH_AUTH_FAILED | - CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { - - xfrm_audit_state_icvfail(x, skb, - x->type->proto); - x->stats.integrity_failed++; - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + if (xo && (xo->flags & CRYPTO_DONE)) { + crypto_done = true; + x = xfrm_input_state(skb); + family = XFRM_SPI_SKB_CB(skb)->family; + + if (!(xo->status & CRYPTO_SUCCESS)) { + if (xo->status & + (CRYPTO_TRANSPORT_AH_AUTH_FAILED | + CRYPTO_TRANSPORT_ESP_AUTH_FAILED | + CRYPTO_TUNNEL_AH_AUTH_FAILED | + CRYPTO_TUNNEL_ESP_AUTH_FAILED)) { + + xfrm_audit_state_icvfail(x, skb, + x->type->proto); + x->stats.integrity_failed++; + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } - XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); - goto drop; - } - - if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); - goto drop; + if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + goto drop; + } } goto lock; -- cgit v1.2.3-59-g8ed1b