aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h244
1 files changed, 196 insertions, 48 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dbe29b6c9bd6..72299ef00061 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -22,6 +22,7 @@
#include <linux/cache.h>
#include <linux/rbtree.h>
#include <linux/socket.h>
+#include <linux/refcount.h>
#include <linux/atomic.h>
#include <asm/types.h>
@@ -345,6 +346,42 @@ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
frag->size -= delta;
}
+static inline bool skb_frag_must_loop(struct page *p)
+{
+#if defined(CONFIG_HIGHMEM)
+ if (PageHighMem(p))
+ return true;
+#endif
+ return false;
+}
+
+/**
+ * skb_frag_foreach_page - loop over pages in a fragment
+ *
+ * @f: skb frag to operate on
+ * @f_off: offset from start of f->page.p
+ * @f_len: length from f_off to loop over
+ * @p: (temp var) current page
+ * @p_off: (temp var) offset from start of current page,
+ * non-zero only on first page.
+ * @p_len: (temp var) length in current page,
+ * < PAGE_SIZE only on first and last page.
+ * @copied: (temp var) length so far, excluding current p_len.
+ *
+ * A fragment can hold a compound page, in which case per-page
+ * operations, notably kmap_atomic, must be called for each
+ * regular page.
+ */
+#define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \
+ for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \
+ p_off = (f_off) & (PAGE_SIZE - 1), \
+ p_len = skb_frag_must_loop(p) ? \
+ min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \
+ copied = 0; \
+ copied < f_len; \
+ copied += p_len, p++, p_off = 0, \
+ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \
+
#define HAVE_HW_TIME_STAMP
/**
@@ -393,6 +430,7 @@ enum {
SKBTX_SCHED_TSTAMP = 1 << 6,
};
+#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
SKBTX_SCHED_TSTAMP)
#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@ -407,10 +445,46 @@ enum {
*/
struct ubuf_info {
void (*callback)(struct ubuf_info *, bool zerocopy_success);
- void *ctx;
- unsigned long desc;
+ union {
+ struct {
+ unsigned long desc;
+ void *ctx;
+ };
+ struct {
+ u32 id;
+ u16 len;
+ u16 zerocopy:1;
+ u32 bytelen;
+ };
+ };
+ refcount_t refcnt;
+
+ struct mmpin {
+ struct user_struct *user;
+ unsigned int num_pg;
+ } mmp;
};
+#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size,
+ struct ubuf_info *uarg);
+
+static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+{
+ refcount_inc(&uarg->refcnt);
+}
+
+void sock_zerocopy_put(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+ struct msghdr *msg, int len,
+ struct ubuf_info *uarg);
+
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
@@ -463,39 +537,38 @@ enum {
enum {
SKB_GSO_TCPV4 = 1 << 0,
- SKB_GSO_UDP = 1 << 1,
/* This indicates the skb is from an untrusted source. */
- SKB_GSO_DODGY = 1 << 2,
+ SKB_GSO_DODGY = 1 << 1,
/* This indicates the tcp segment has CWR set. */
- SKB_GSO_TCP_ECN = 1 << 3,
+ SKB_GSO_TCP_ECN = 1 << 2,
- SKB_GSO_TCP_FIXEDID = 1 << 4,
+ SKB_GSO_TCP_FIXEDID = 1 << 3,
- SKB_GSO_TCPV6 = 1 << 5,
+ SKB_GSO_TCPV6 = 1 << 4,
- SKB_GSO_FCOE = 1 << 6,
+ SKB_GSO_FCOE = 1 << 5,
- SKB_GSO_GRE = 1 << 7,
+ SKB_GSO_GRE = 1 << 6,
- SKB_GSO_GRE_CSUM = 1 << 8,
+ SKB_GSO_GRE_CSUM = 1 << 7,
- SKB_GSO_IPXIP4 = 1 << 9,
+ SKB_GSO_IPXIP4 = 1 << 8,
- SKB_GSO_IPXIP6 = 1 << 10,
+ SKB_GSO_IPXIP6 = 1 << 9,
- SKB_GSO_UDP_TUNNEL = 1 << 11,
+ SKB_GSO_UDP_TUNNEL = 1 << 10,
- SKB_GSO_UDP_TUNNEL_CSUM = 1 << 12,
+ SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
- SKB_GSO_PARTIAL = 1 << 13,
+ SKB_GSO_PARTIAL = 1 << 12,
- SKB_GSO_TUNNEL_REMCSUM = 1 << 14,
+ SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
- SKB_GSO_SCTP = 1 << 15,
+ SKB_GSO_SCTP = 1 << 14,
- SKB_GSO_ESP = 1 << 16,
+ SKB_GSO_ESP = 1 << 15,
};
#if BITS_PER_LONG > 32
@@ -885,7 +958,7 @@ void kfree_skb(struct sk_buff *skb);
void kfree_skb_list(struct sk_buff *segs);
void skb_tx_error(struct sk_buff *skb);
void consume_skb(struct sk_buff *skb);
-void consume_stateless_skb(struct sk_buff *skb);
+void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
extern struct kmem_cache *skbuff_head_cache;
@@ -945,12 +1018,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);
}
-struct sk_buff *__alloc_skb_head(gfp_t priority, int node);
-static inline struct sk_buff *alloc_skb_head(gfp_t priority)
-{
- return __alloc_skb_head(priority, -1);
-}
-
struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src);
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
@@ -973,7 +1040,23 @@ int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg
int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg,
int offset, int len);
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
-int skb_pad(struct sk_buff *skb, int pad);
+int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error);
+
+/**
+ * skb_pad - zero pad the tail of an skb
+ * @skb: buffer to pad
+ * @pad: space to pad
+ *
+ * Ensure that a buffer is followed by a padding area that is zero
+ * filled. Used by network drivers which may DMA or transfer data
+ * beyond the buffer end onto the wire.
+ *
+ * May return error in out of memory cases. The skb is freed on error.
+ */
+static inline int skb_pad(struct sk_buff *skb, int pad)
+{
+ return __skb_pad(skb, pad, true);
+}
#define dev_kfree_skb(a) consume_skb(a)
int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
@@ -1129,8 +1212,6 @@ static inline __u32 skb_get_hash(struct sk_buff *skb)
return skb->hash;
}
-__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6);
-
static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6)
{
if (!skb->l4_hash && !skb->sw_hash) {
@@ -1143,20 +1224,6 @@ static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6
return skb->hash;
}
-__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl);
-
-static inline __u32 skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4)
-{
- if (!skb->l4_hash && !skb->sw_hash) {
- struct flow_keys keys;
- __u32 hash = __get_hash_from_flowi4(fl4, &keys);
-
- __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys));
- }
-
- return skb->hash;
-}
-
__u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb);
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
@@ -1201,6 +1268,50 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
return &skb_shinfo(skb)->hwtstamps;
}
+static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+{
+ bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+
+ return is_zcopy ? skb_uarg(skb) : NULL;
+}
+
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+ if (skb && uarg && !skb_zcopy(skb)) {
+ sock_zerocopy_get(uarg);
+ skb_shinfo(skb)->destructor_arg = uarg;
+ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
+/* Release a reference on a zerocopy structure */
+static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+{
+ struct ubuf_info *uarg = skb_zcopy(skb);
+
+ if (uarg) {
+ if (uarg->callback == sock_zerocopy_callback) {
+ uarg->zerocopy = uarg->zerocopy && zerocopy;
+ sock_zerocopy_put(uarg);
+ } else {
+ uarg->callback(uarg, zerocopy);
+ }
+
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
+/* Abort a zerocopy operation and revert zckey on error in send syscall */
+static inline void skb_zcopy_abort(struct sk_buff *skb)
+{
+ struct ubuf_info *uarg = skb_zcopy(skb);
+
+ if (uarg) {
+ sock_zerocopy_put_abort(uarg);
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
/**
* skb_queue_empty - check if a queue is empty
* @list: queue head
@@ -1783,13 +1894,18 @@ static inline unsigned int skb_headlen(const struct sk_buff *skb)
return skb->len - skb->data_len;
}
-static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+static inline unsigned int __skb_pagelen(const struct sk_buff *skb)
{
unsigned int i, len = 0;
for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--)
len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
- return len + skb_headlen(skb);
+ return len;
+}
+
+static inline unsigned int skb_pagelen(const struct sk_buff *skb)
+{
+ return skb_headlen(skb) + __skb_pagelen(skb);
}
/**
@@ -2434,7 +2550,17 @@ static inline void skb_orphan(struct sk_buff *skb)
*/
static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask)
{
- if (likely(!(skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY)))
+ if (likely(!skb_zcopy(skb)))
+ return 0;
+ if (skb_uarg(skb)->callback == sock_zerocopy_callback)
+ return 0;
+ return skb_copy_ubufs(skb, gfp_mask);
+}
+
+/* Frags must be orphaned, even if refcounted, if skb might loop to rx path */
+static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask)
+{
+ if (likely(!skb_zcopy(skb)))
return 0;
return skb_copy_ubufs(skb, gfp_mask);
}
@@ -2825,25 +2951,42 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
* skb_put_padto - increase size and pad an skbuff up to a minimal size
* @skb: buffer to pad
* @len: minimal length
+ * @free_on_error: free buffer on error
*
* Pads up a buffer to ensure the trailing bytes exist and are
* blanked. If the buffer already contains sufficient data it
* is untouched. Otherwise it is extended. Returns zero on
- * success. The skb is freed on error.
+ * success. The skb is freed on error if @free_on_error is true.
*/
-static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len,
+ bool free_on_error)
{
unsigned int size = skb->len;
if (unlikely(size < len)) {
len -= size;
- if (skb_pad(skb, len))
+ if (__skb_pad(skb, len, free_on_error))
return -ENOMEM;
__skb_put(skb, len);
}
return 0;
}
+/**
+ * skb_put_padto - increase size and pad an skbuff up to a minimal size
+ * @skb: buffer to pad
+ * @len: minimal length
+ *
+ * Pads up a buffer to ensure the trailing bytes exist and are
+ * blanked. If the buffer already contains sufficient data it
+ * is untouched. Otherwise it is extended. Returns zero on
+ * success. The skb is freed on error.
+ */
+static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+{
+ return __skb_put_padto(skb, len, true);
+}
+
static inline int skb_add_data(struct sk_buff *skb,
struct iov_iter *from, int copy)
{
@@ -2866,6 +3009,8 @@ static inline int skb_add_data(struct sk_buff *skb,
static inline bool skb_can_coalesce(struct sk_buff *skb, int i,
const struct page *page, int off)
{
+ if (skb_zcopy(skb))
+ return false;
if (i) {
const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
@@ -3120,6 +3265,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to,
int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
struct pipe_inode_info *pipe, unsigned int len,
unsigned int flags);
+int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
+ int len);
+int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,