aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-02-01 11:22:38 -0500
committerDavid S. Miller <davem@davemloft.net>2017-02-01 11:22:38 -0500
commit04cdf13e34e912dcab9a94f391e15b2c26dfd0a2 (patch)
tree0f0dcf018d5ce2298ed98ec854d6592370a4348a /net
parentMerge branch 'bgmac-phy-init' (diff)
parentxfrm: Add a dummy network device for napi. (diff)
downloadlinux-dev-04cdf13e34e912dcab9a94f391e15b2c26dfd0a2.tar.xz
linux-dev-04cdf13e34e912dcab9a94f391e15b2c26dfd0a2.zip
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next
Steffen Klassert says: ==================== pull request (net-next): ipsec-next 2017-02-01 1) Some typo fixes, from Alexander Alemayhu. 2) Don't acquire state lock in get_mtu functions. The only rece against a dead state does not matter. From Florian Westphal. 3) Remove xfrm4_state_fini, it is unused for more than 10 years. From Florian Westphal. 4) Various rcu usage improvements. From Florian Westphal. 5) Properly handle crypto arrors in ah4/ah6. From Gilad Ben-Yossef. 6) Try to avoid skb linearization in esp4 and esp6. 7) The esp trailer is now set up in different places, add a helper for this. 8) With the upcomming usage of gro_cells in IPsec, a gro merged skb can have a secpath. Drop it before freeing or reusing the skb. 9) Add a xfrm dummy network device for napi. With this we can use gro_cells from within xfrm, it allows IPsec GRO without impact on the generic networking code. Please pull or let me know if there are problems. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/dev.c2
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/esp4.c332
-rw-r--r--net/ipv4/xfrm4_state.c8
-rw-r--r--net/ipv6/ah6.c3
-rw-r--r--net/ipv6/esp6.c318
-rw-r--r--net/xfrm/xfrm_input.c12
-rw-r--r--net/xfrm/xfrm_output.c8
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c86
10 files changed, 575 insertions, 199 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index be11abac89b3..e61528c50209 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4593,6 +4593,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
case GRO_MERGED_FREE:
if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
skb_dst_drop(skb);
+ secpath_reset(skb);
kmem_cache_free(skbuff_head_cache, skb);
} else {
__kfree_skb(skb);
@@ -4633,6 +4634,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
+ secpath_reset(skb);
napi->skb = skb;
}
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index f2a71025a770..22377c8ff14b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err)
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ if (err)
+ goto out;
+
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 20fb25e3027b..b1e24446e297 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -18,6 +18,8 @@
#include <net/protocol.h>
#include <net/udp.h>
+#include <linux/highmem.h>
+
struct esp_skb_cb {
struct xfrm_skb_cb xfrm;
void *tmp;
@@ -92,11 +94,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+{
+ struct esp_output_extra *extra = esp_tmp_extra(tmp);
+ struct crypto_aead *aead = x->data;
+ int extralen = 0;
+ u8 *iv;
+ struct aead_request *req;
+ struct scatterlist *sg;
+
+ if (x->props.flags & XFRM_STATE_ESN)
+ extralen += sizeof(*extra);
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+
+ /* Unref skb_frag_pages in the src scatterlist if necessary.
+ * Skip the first sg which comes from skb->data.
+ */
+ if (req->src != req->dst)
+ for (sg = sg_next(req->src); sg; sg = sg_next(sg))
+ put_page(sg_page(sg));
+}
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ void *tmp;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
- kfree(ESP_SKB_CB(skb)->tmp);
+ tmp = ESP_SKB_CB(skb)->tmp;
+ esp_ssg_unref(x, tmp);
+ kfree(tmp);
xfrm_output_resume(skb, err);
}
@@ -120,6 +151,29 @@ static void esp_output_restore_header(struct sk_buff *skb)
sizeof(__be32));
}
+static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
+ struct ip_esp_hdr *esph,
+ struct esp_output_extra *extra)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ extra->esphoff = (unsigned char *)esph -
+ skb_transport_header(skb);
+ esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+ extra->seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ }
+
+ esph->spi = x->id.spi;
+
+ return esph;
+}
+
static void esp_output_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -128,18 +182,36 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
+static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
+{
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+}
+
static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
{
- int err;
struct esp_output_extra *extra;
+ int err = -ENOMEM;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_request *req;
- struct scatterlist *sg;
+ struct scatterlist *sg, *dsg;
struct sk_buff *trailer;
+ struct page *page;
void *tmp;
u8 *iv;
u8 *tail;
+ u8 *vaddr;
int blksize;
int clen;
int alen;
@@ -149,7 +221,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
int nfrags;
int assoclen;
int extralen;
+ int tailen;
__be64 seqno;
+ __u8 proto = *skb_mac_header(skb);
/* skb is pure payload to encrypt */
@@ -169,12 +243,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
plen = clen - skb->len - tfclen;
-
- err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
-
+ tailen = tfclen + plen + alen;
assoclen = sizeof(*esph);
extralen = 0;
@@ -183,35 +252,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
assoclen += sizeof(__be32);
}
- tmp = esp_alloc_tmp(aead, nfrags, extralen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
- }
-
- extra = esp_tmp_extra(tmp);
- iv = esp_tmp_iv(aead, tmp, extralen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
-
- /* Fill padding... */
- tail = skb_tail_pointer(trailer);
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = *skb_mac_header(skb);
- pskb_put(skb, trailer, clen - skb->len + alen);
-
- skb_push(skb, -skb_network_offset(skb));
- esph = ip_esp_hdr(skb);
*skb_mac_header(skb) = IPPROTO_ESP;
+ esph = ip_esp_hdr(skb);
/* this is non-NULL only with UDP Encapsulation */
if (x->encap) {
@@ -230,7 +272,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
uh = (struct udphdr *)esph;
uh->source = sport;
uh->dest = dport;
- uh->len = htons(skb->len - skb_transport_offset(skb));
+ uh->len = htons(skb->len + tailen
+ - skb_transport_offset(skb));
uh->check = 0;
switch (encap_type) {
@@ -248,31 +291,148 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
*skb_mac_header(skb) = IPPROTO_UDP;
}
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_availroom(skb)) {
+ nfrags = 1;
+ trailer = skb;
+ tail = skb_tail_pointer(trailer);
- aead_request_set_callback(req, 0, esp_output_done, skb);
+ goto skip_cow;
+ } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
+ && !skb_has_frag_list(skb)) {
+ int allocsize;
+ struct sock *sk = skb->sk;
+ struct page_frag *pfrag = &x->xfrag;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * encryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- extra->esphoff = (unsigned char *)esph -
- skb_transport_header(skb);
- esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
- extra->seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ allocsize = ALIGN(tailen, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto cow;
+ }
+
+ page = pfrag->page;
+ get_page(page);
+
+ vaddr = kmap_atomic(page);
+
+ tail = vaddr + pfrag->offset;
+
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+
+ kunmap_atomic(vaddr);
+
+ nfrags = skb_shinfo(skb)->nr_frags;
+
+ __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
+ tailen);
+ skb_shinfo(skb)->nr_frags = ++nfrags;
+
+ pfrag->offset = pfrag->offset + allocsize;
+ nfrags++;
+
+ skb->len += tailen;
+ skb->data_len += tailen;
+ skb->truesize += tailen;
+ if (sk)
+ atomic_add(tailen, &sk->sk_wmem_alloc);
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
+
+ tmp = esp_alloc_tmp(aead, nfrags + 2, extralen);
+ if (!tmp) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = &sg[nfrags];
+
+ esph = esp_output_set_extra(skb, esph, extra);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ spin_unlock_bh(&x->lock);
+
+ goto skip_cow2;
+ }
}
+cow:
+ err = skb_cow_data(skb, tailen, &trailer);
+ if (err < 0)
+ goto error;
+ nfrags = err;
+ tail = skb_tail_pointer(trailer);
+ esph = ip_esp_hdr(skb);
+
+skip_cow:
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+
+ pskb_put(skb, trailer, clen - skb->len + alen);
+ skb_push(skb, -skb_network_offset(skb));
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
esph->spi = x->id.spi;
+ tmp = esp_alloc_tmp(aead, nfrags, extralen);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ extra = esp_tmp_extra(tmp);
+ iv = esp_tmp_iv(aead, tmp, extralen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = sg;
+
+ esph = esp_output_set_extra(skb, esph, extra);
+
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + clen + alen);
- aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+skip_cow2:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ else
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
aead_request_set_ad(req, assoclen);
seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -298,6 +458,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb)
esp_output_restore_header(skb);
}
+ if (sg != dsg)
+ esp_ssg_unref(x, tmp);
kfree(tmp);
error:
@@ -401,6 +563,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
__skb_pull(skb, 4);
}
+static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ }
+}
+
static void esp_input_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -437,12 +616,6 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
if (elen <= 0)
goto out;
- err = skb_cow_data(skb, 0, &trailer);
- if (err < 0)
- goto out;
-
- nfrags = err;
-
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -451,6 +624,26 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
+ if (!skb_cloned(skb)) {
+ if (!skb_is_nonlinear(skb)) {
+ nfrags = 1;
+
+ goto skip_cow;
+ } else if (!skb_has_frag_list(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
+ }
+ }
+
+ err = skb_cow_data(skb, 0, &trailer);
+ if (err < 0)
+ goto out;
+
+ nfrags = err;
+
+skip_cow:
err = -ENOMEM;
tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
@@ -462,26 +655,17 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- skb->ip_summed = CHECKSUM_NONE;
+ esp_input_set_header(skb, seqhi);
- esph = (struct ip_esp_hdr *)skb->data;
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- aead_request_set_callback(req, 0, esp_input_done, skb);
+ skb->ip_summed = CHECKSUM_NONE;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * decryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)skb_push(skb, 4);
- *seqhi = esph->spi;
- esph->spi = esph->seq_no;
- esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
- }
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ else
+ aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 542074c00c78..d6660a8c0ea5 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -90,11 +90,3 @@ void __init xfrm4_state_init(void)
{
xfrm_state_register_afinfo(&xfrm4_state_afinfo);
}
-
-#if 0
-void __exit xfrm4_state_fini(void)
-{
- xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
-}
-#endif /* 0 */
-
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 189eb10b742d..dda6035e3b84 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -474,6 +474,9 @@ static void ah6_input_done(struct crypto_async_request *base, int err)
int hdr_len = skb_network_header_len(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ if (err)
+ goto out;
+
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, hdr_len);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index cbcdd5db31f4..ff54faa75631 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -44,6 +44,8 @@
#include <net/protocol.h>
#include <linux/icmpv6.h>
+#include <linux/highmem.h>
+
struct esp_skb_cb {
struct xfrm_skb_cb xfrm;
void *tmp;
@@ -114,11 +116,40 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
__alignof__(struct scatterlist));
}
+static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
+{
+ __be32 *seqhi;
+ struct crypto_aead *aead = x->data;
+ int seqhilen = 0;
+ u8 *iv;
+ struct aead_request *req;
+ struct scatterlist *sg;
+
+ if (x->props.flags & XFRM_STATE_ESN)
+ seqhilen += sizeof(__be32);
+
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+
+ /* Unref skb_frag_pages in the src scatterlist if necessary.
+ * Skip the first sg which comes from skb->data.
+ */
+ if (req->src != req->dst)
+ for (sg = sg_next(req->src); sg; sg = sg_next(sg))
+ put_page(sg_page(sg));
+}
+
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ void *tmp;
+ struct dst_entry *dst = skb_dst(skb);
+ struct xfrm_state *x = dst->xfrm;
- kfree(ESP_SKB_CB(skb)->tmp);
+ tmp = ESP_SKB_CB(skb)->tmp;
+ esp_ssg_unref(x, tmp);
+ kfree(tmp);
xfrm_output_resume(skb, err);
}
@@ -138,6 +169,27 @@ static void esp_output_restore_header(struct sk_buff *skb)
esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
}
+static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
+ struct ip_esp_hdr *esph,
+ __be32 *seqhi)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * encryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
+ *seqhi = esph->spi;
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+ }
+
+ esph->spi = x->id.spi;
+
+ return esph;
+}
+
static void esp_output_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -146,14 +198,31 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
esp_output_done(base, err);
}
+static void esp_output_fill_trailer(u8 *tail, int tfclen, int plen, __u8 proto)
+{
+ /* Fill padding... */
+ if (tfclen) {
+ memset(tail, 0, tfclen);
+ tail += tfclen;
+ }
+ do {
+ int i;
+ for (i = 0; i < plen - 2; i++)
+ tail[i] = i + 1;
+ } while (0);
+ tail[plen - 2] = plen - 2;
+ tail[plen - 1] = proto;
+}
+
static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
{
int err;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
struct aead_request *req;
- struct scatterlist *sg;
+ struct scatterlist *sg, *dsg;
struct sk_buff *trailer;
+ struct page *page;
void *tmp;
int blksize;
int clen;
@@ -164,10 +233,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
int nfrags;
int assoclen;
int seqhilen;
+ int tailen;
u8 *iv;
u8 *tail;
+ u8 *vaddr;
__be32 *seqhi;
__be64 seqno;
+ __u8 proto = *skb_mac_header(skb);
/* skb is pure payload to encrypt */
aead = x->data;
@@ -186,11 +258,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
blksize = ALIGN(crypto_aead_blocksize(aead), 4);
clen = ALIGN(skb->len + 2 + tfclen, blksize);
plen = clen - skb->len - tfclen;
-
- err = skb_cow_data(skb, tfclen + plen + alen, &trailer);
- if (err < 0)
- goto error;
- nfrags = err;
+ tailen = tfclen + plen + alen;
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -200,59 +268,152 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
- tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
+ *skb_mac_header(skb) = IPPROTO_ESP;
+ esph = ip_esp_hdr(skb);
+
+ if (!skb_cloned(skb)) {
+ if (tailen <= skb_availroom(skb)) {
+ nfrags = 1;
+ trailer = skb;
+ tail = skb_tail_pointer(trailer);
+
+ goto skip_cow;
+ } else if ((skb_shinfo(skb)->nr_frags < MAX_SKB_FRAGS)
+ && !skb_has_frag_list(skb)) {
+ int allocsize;
+ struct sock *sk = skb->sk;
+ struct page_frag *pfrag = &x->xfrag;
+
+ allocsize = ALIGN(tailen, L1_CACHE_BYTES);
+
+ spin_lock_bh(&x->lock);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ goto cow;
+ }
+
+ page = pfrag->page;
+ get_page(page);
+
+ vaddr = kmap_atomic(page);
+
+ tail = vaddr + pfrag->offset;
+
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+
+ kunmap_atomic(vaddr);
+
+ nfrags = skb_shinfo(skb)->nr_frags;
+
+ __skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
+ tailen);
+ skb_shinfo(skb)->nr_frags = ++nfrags;
+
+ pfrag->offset = pfrag->offset + allocsize;
+ nfrags++;
+
+ skb->len += tailen;
+ skb->data_len += tailen;
+ skb->truesize += tailen;
+ if (sk)
+ atomic_add(tailen, &sk->sk_wmem_alloc);
+
+ skb_push(skb, -skb_network_offset(skb));
+
+ esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
+
+ tmp = esp_alloc_tmp(aead, nfrags + 2, seqhilen);
+ if (!tmp) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = &sg[nfrags];
+
+ esph = esp_output_set_esn(skb, esph, seqhi);
+
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ allocsize = ALIGN(skb->data_len, L1_CACHE_BYTES);
+
+ if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
+ spin_unlock_bh(&x->lock);
+ err = -ENOMEM;
+ goto error;
+ }
+
+ skb_shinfo(skb)->nr_frags = 1;
+
+ page = pfrag->page;
+ get_page(page);
+ /* replace page frags in skb with new page */
+ __skb_fill_page_desc(skb, 0, page, pfrag->offset, skb->data_len);
+ pfrag->offset = pfrag->offset + allocsize;
+
+ sg_init_table(dsg, skb_shinfo(skb)->nr_frags + 1);
+ skb_to_sgvec(skb, dsg,
+ (unsigned char *)esph - skb->data,
+ assoclen + ivlen + clen + alen);
+
+ spin_unlock_bh(&x->lock);
+
+ goto skip_cow2;
+ }
}
- seqhi = esp_tmp_seqhi(tmp);
- iv = esp_tmp_iv(aead, tmp, seqhilen);
- req = esp_tmp_req(aead, iv);
- sg = esp_req_sg(aead, req);
+cow:
+ err = skb_cow_data(skb, tailen, &trailer);
+ if (err < 0)
+ goto error;
+ nfrags = err;
- /* Fill padding... */
tail = skb_tail_pointer(trailer);
- if (tfclen) {
- memset(tail, 0, tfclen);
- tail += tfclen;
- }
- do {
- int i;
- for (i = 0; i < plen - 2; i++)
- tail[i] = i + 1;
- } while (0);
- tail[plen - 2] = plen - 2;
- tail[plen - 1] = *skb_mac_header(skb);
- pskb_put(skb, trailer, clen - skb->len + alen);
+ esph = ip_esp_hdr(skb);
+skip_cow:
+ esp_output_fill_trailer(tail, tfclen, plen, proto);
+
+ pskb_put(skb, trailer, clen - skb->len + alen);
skb_push(skb, -skb_network_offset(skb));
- esph = ip_esp_hdr(skb);
- *skb_mac_header(skb) = IPPROTO_ESP;
esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
+ esph->spi = x->id.spi;
- aead_request_set_callback(req, 0, esp_output_done, skb);
-
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * encryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
- *seqhi = esph->spi;
- esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
- aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto error;
}
- esph->spi = x->id.spi;
+ seqhi = esp_tmp_seqhi(tmp);
+ iv = esp_tmp_iv(aead, tmp, seqhilen);
+ req = esp_tmp_req(aead, iv);
+ sg = esp_req_sg(aead, req);
+ dsg = sg;
+
+ esph = esp_output_set_esn(skb, esph, seqhi);
sg_init_table(sg, nfrags);
skb_to_sgvec(skb, sg,
(unsigned char *)esph - skb->data,
assoclen + ivlen + clen + alen);
- aead_request_set_crypt(req, sg, sg, ivlen + clen, iv);
+skip_cow2:
+ if ((x->props.flags & XFRM_STATE_ESN))
+ aead_request_set_callback(req, 0, esp_output_done_esn, skb);
+ else
+ aead_request_set_callback(req, 0, esp_output_done, skb);
+
+ aead_request_set_crypt(req, sg, dsg, ivlen + clen, iv);
aead_request_set_ad(req, assoclen);
seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low +
@@ -278,6 +439,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
esp_output_restore_header(skb);
}
+ if (sg != dsg)
+ esp_ssg_unref(x, tmp);
kfree(tmp);
error:
@@ -343,6 +506,23 @@ static void esp_input_restore_header(struct sk_buff *skb)
__skb_pull(skb, 4);
}
+static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
+{
+ struct xfrm_state *x = xfrm_input_state(skb);
+ struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data;
+
+ /* For ESN we move the header forward by 4 bytes to
+ * accomodate the high bits. We will move it back after
+ * decryption.
+ */
+ if ((x->props.flags & XFRM_STATE_ESN)) {
+ esph = (void *)skb_push(skb, 4);
+ *seqhi = esph->spi;
+ esph->spi = esph->seq_no;
+ esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ }
+}
+
static void esp_input_done_esn(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
@@ -378,14 +558,6 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
goto out;
}
- nfrags = skb_cow_data(skb, 0, &trailer);
- if (nfrags < 0) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = -ENOMEM;
-
assoclen = sizeof(*esph);
seqhilen = 0;
@@ -394,6 +566,27 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
assoclen += seqhilen;
}
+ if (!skb_cloned(skb)) {
+ if (!skb_is_nonlinear(skb)) {
+ nfrags = 1;
+
+ goto skip_cow;
+ } else if (!skb_has_frag_list(skb)) {
+ nfrags = skb_shinfo(skb)->nr_frags;
+ nfrags++;
+
+ goto skip_cow;
+ }
+ }
+
+ nfrags = skb_cow_data(skb, 0, &trailer);
+ if (nfrags < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+skip_cow:
+ ret = -ENOMEM;
tmp = esp_alloc_tmp(aead, nfrags, seqhilen);
if (!tmp)
goto out;
@@ -404,26 +597,17 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
req = esp_tmp_req(aead, iv);
sg = esp_req_sg(aead, req);
- skb->ip_summed = CHECKSUM_NONE;
+ esp_input_set_header(skb, seqhi);
- esph = (struct ip_esp_hdr *)skb->data;
+ sg_init_table(sg, nfrags);
+ skb_to_sgvec(skb, sg, 0, skb->len);
- aead_request_set_callback(req, 0, esp_input_done, skb);
+ skb->ip_summed = CHECKSUM_NONE;
- /* For ESN we move the header forward by 4 bytes to
- * accomodate the high bits. We will move it back after
- * decryption.
- */
- if ((x->props.flags & XFRM_STATE_ESN)) {
- esph = (void *)skb_push(skb, 4);
- *seqhi = esph->spi;
- esph->spi = esph->seq_no;
- esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi;
+ if ((x->props.flags & XFRM_STATE_ESN))
aead_request_set_callback(req, 0, esp_input_done_esn, skb);
- }
-
- sg_init_table(sg, nfrags);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ else
+ aead_request_set_callback(req, 0, esp_input_done, skb);
aead_request_set_crypt(req, sg, sg, elen + ivlen, iv);
aead_request_set_ad(req, assoclen);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 6e3f0254d8a1..3213fe8027be 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -21,6 +21,9 @@ static struct kmem_cache *secpath_cachep __read_mostly;
static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
static struct xfrm_input_afinfo __rcu *xfrm_input_afinfo[NPROTO];
+static struct gro_cells gro_cells;
+static struct net_device xfrm_napi_dev;
+
int xfrm_input_register_afinfo(struct xfrm_input_afinfo *afinfo)
{
int err = 0;
@@ -371,7 +374,7 @@ resume:
if (decaps) {
skb_dst_drop(skb);
- netif_rx(skb);
+ gro_cells_receive(&gro_cells, skb);
return 0;
} else {
return x->inner_mode->afinfo->transport_finish(skb, async);
@@ -394,6 +397,13 @@ EXPORT_SYMBOL(xfrm_input_resume);
void __init xfrm_input_init(void)
{
+ int err;
+
+ init_dummy_netdev(&xfrm_napi_dev);
+ err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
+ if (err)
+ gro_cells.cells = NULL;
+
secpath_cachep = kmem_cache_create("secpath_cache",
sizeof(struct sec_path),
0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 637387bbaaea..8ba29fe58352 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -246,10 +246,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu)
return;
afinfo = xfrm_state_get_afinfo(proto);
- if (!afinfo)
- return;
-
- afinfo->local_error(skb, mtu);
- xfrm_state_put_afinfo(afinfo);
+ if (afinfo)
+ afinfo->local_error(skb, mtu);
+ rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(xfrm_local_error);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 177e208e8ff5..99ad1af2927f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -330,7 +330,7 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
}
EXPORT_SYMBOL(xfrm_policy_destroy);
-/* Rule must be locked. Release descentant resources, announce
+/* Rule must be locked. Release descendant resources, announce
* entry dead. The rule must be unlinked from lists to the moment.
*/
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 64e3c82eedf6..5a597dbbe564 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -192,7 +192,7 @@ int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
else
err = -EEXIST;
spin_unlock_bh(&xfrm_type_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_type);
@@ -213,7 +213,7 @@ int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
else
typemap[type->proto] = NULL;
spin_unlock_bh(&xfrm_type_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_unregister_type);
@@ -231,17 +231,18 @@ retry:
return NULL;
typemap = afinfo->type_map;
- type = typemap[proto];
+ type = READ_ONCE(typemap[proto]);
if (unlikely(type && !try_module_get(type->owner)))
type = NULL;
+
+ rcu_read_unlock();
+
if (!type && !modload_attempted) {
- xfrm_state_put_afinfo(afinfo);
request_module("xfrm-type-%d-%d", family, proto);
modload_attempted = 1;
goto retry;
}
- xfrm_state_put_afinfo(afinfo);
return type;
}
@@ -280,7 +281,7 @@ int xfrm_register_mode(struct xfrm_mode *mode, int family)
out:
spin_unlock_bh(&xfrm_mode_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_register_mode);
@@ -308,7 +309,7 @@ int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
}
spin_unlock_bh(&xfrm_mode_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_unregister_mode);
@@ -327,17 +328,17 @@ retry:
if (unlikely(afinfo == NULL))
return NULL;
- mode = afinfo->mode_map[encap];
+ mode = READ_ONCE(afinfo->mode_map[encap]);
if (unlikely(mode && !try_module_get(mode->owner)))
mode = NULL;
+
+ rcu_read_unlock();
if (!mode && !modload_attempted) {
- xfrm_state_put_afinfo(afinfo);
request_module("xfrm-mode-%d-%d", family, encap);
modload_attempted = 1;
goto retry;
}
- xfrm_state_put_afinfo(afinfo);
return mode;
}
@@ -409,7 +410,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->xflags & XFRM_SOFT_EXPIRE) {
/* enter hard expire without soft expire first?!
* setting a new date could trigger this.
- * workarbound: fix x->curflt.add_time by below:
+ * workaround: fix x->curflt.add_time by below:
*/
x->curlft.add_time = now - x->saved_tmo - 1;
tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
@@ -639,26 +640,25 @@ void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
}
EXPORT_SYMBOL(xfrm_sad_getinfo);
-static int
+static void
xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
const struct xfrm_tmpl *tmpl,
const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short family)
{
- struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
+ struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
+
if (!afinfo)
- return -1;
+ return;
+
afinfo->init_tempsel(&x->sel, fl);
if (family != tmpl->encap_family) {
- xfrm_state_put_afinfo(afinfo);
- afinfo = xfrm_state_get_afinfo(tmpl->encap_family);
+ afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
if (!afinfo)
- return -1;
+ return;
}
afinfo->init_temprop(x, tmpl, daddr, saddr);
- xfrm_state_put_afinfo(afinfo);
- return 0;
}
static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
@@ -1474,7 +1474,7 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
if (afinfo->tmpl_sort)
err = afinfo->tmpl_sort(dst, src, n);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_tmpl_sort);
@@ -1494,7 +1494,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
if (afinfo->state_sort)
err = afinfo->state_sort(dst, src, n);
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
return err;
}
EXPORT_SYMBOL(xfrm_state_sort);
@@ -1932,10 +1932,10 @@ EXPORT_SYMBOL(xfrm_unregister_km);
int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
{
int err = 0;
- if (unlikely(afinfo == NULL))
- return -EINVAL;
- if (unlikely(afinfo->family >= NPROTO))
+
+ if (WARN_ON(afinfo->family >= NPROTO))
return -EAFNOSUPPORT;
+
spin_lock_bh(&xfrm_state_afinfo_lock);
if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
err = -EEXIST;
@@ -1948,14 +1948,14 @@ EXPORT_SYMBOL(xfrm_state_register_afinfo);
int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
{
- int err = 0;
- if (unlikely(afinfo == NULL))
- return -EINVAL;
- if (unlikely(afinfo->family >= NPROTO))
+ int err = 0, family = afinfo->family;
+
+ if (WARN_ON(family >= NPROTO))
return -EAFNOSUPPORT;
+
spin_lock_bh(&xfrm_state_afinfo_lock);
if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
- if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
+ if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
err = -EINVAL;
else
RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
@@ -1966,6 +1966,14 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
+struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
+{
+ if (unlikely(family >= NPROTO))
+ return NULL;
+
+ return rcu_dereference(xfrm_state_afinfo[family]);
+}
+
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
{
struct xfrm_state_afinfo *afinfo;
@@ -1978,11 +1986,6 @@ struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
return afinfo;
}
-void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
-{
- rcu_read_unlock();
-}
-
/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
void xfrm_state_delete_tunnel(struct xfrm_state *x)
{
@@ -2000,16 +2003,13 @@ EXPORT_SYMBOL(xfrm_state_delete_tunnel);
int xfrm_state_mtu(struct xfrm_state *x, int mtu)
{
- int res;
+ const struct xfrm_type *type = READ_ONCE(x->type);
- spin_lock_bh(&x->lock);
if (x->km.state == XFRM_STATE_VALID &&
- x->type && x->type->get_mtu)
- res = x->type->get_mtu(x, mtu);
- else
- res = mtu - x->props.header_len;
- spin_unlock_bh(&x->lock);
- return res;
+ type && type->get_mtu)
+ return type->get_mtu(x, mtu);
+
+ return mtu - x->props.header_len;
}
int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
@@ -2028,7 +2028,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay)
if (afinfo->init_flags)
err = afinfo->init_flags(x);
- xfrm_state_put_afinfo(afinfo);
+ rcu_read_unlock();
if (err)
goto error;