aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2017-08-03 16:29:39 -0400
committerDavid S. Miller <davem@davemloft.net>2017-08-03 21:37:29 -0700
commit52267790ef52d7513879238ca9fac22c1733e0e3 (patch)
tree3df2cd31743717fbf4335b950cc52328f2f44f14 /include/linux
parentsock: skb_copy_ubufs support for compound pages (diff)
downloadlinux-dev-52267790ef52d7513879238ca9fac22c1733e0e3.tar.xz
linux-dev-52267790ef52d7513879238ca9fac22c1733e0e3.zip
sock: add MSG_ZEROCOPY
The kernel supports zerocopy sendmsg in virtio and tap. Expand the infrastructure to support other socket types. Introduce a completion notification channel over the socket error queue. Notifications are returned with ee_origin SO_EE_ORIGIN_ZEROCOPY. ee_errno is 0 to avoid blocking the send/recv path on receiving notifications. Add reference counting, to support the skb split, merge, resize and clone operations possible with SOCK_STREAM and other socket types. The patch does not yet modify any datapaths. Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/skbuff.h60
-rw-r--r--include/linux/socket.h1
2 files changed, 61 insertions, 0 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2f64e2bbb592..59cff7aa494e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -429,6 +429,7 @@ enum {
SKBTX_SCHED_TSTAMP = 1 << 6,
};
+#define SKBTX_ZEROCOPY_FRAG (SKBTX_DEV_ZEROCOPY | SKBTX_SHARED_FRAG)
#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
SKBTX_SCHED_TSTAMP)
#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
@@ -445,8 +446,28 @@ struct ubuf_info {
void (*callback)(struct ubuf_info *, bool zerocopy_success);
void *ctx;
unsigned long desc;
+ u16 zerocopy:1;
+ atomic_t refcnt;
};
+#define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg))
+
+struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size);
+
+static inline void sock_zerocopy_get(struct ubuf_info *uarg)
+{
+ atomic_inc(&uarg->refcnt);
+}
+
+void sock_zerocopy_put(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+
+void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+
+int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
+ struct msghdr *msg, int len,
+ struct ubuf_info *uarg);
+
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
@@ -1214,6 +1235,45 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
return &skb_shinfo(skb)->hwtstamps;
}
+static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
+{
+ bool is_zcopy = skb && skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY;
+
+ return is_zcopy ? skb_uarg(skb) : NULL;
+}
+
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+{
+ if (skb && uarg && !skb_zcopy(skb)) {
+ sock_zerocopy_get(uarg);
+ skb_shinfo(skb)->destructor_arg = uarg;
+ skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
+/* Release a reference on a zerocopy structure */
+static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy)
+{
+ struct ubuf_info *uarg = skb_zcopy(skb);
+
+ if (uarg) {
+ uarg->zerocopy = uarg->zerocopy && zerocopy;
+ sock_zerocopy_put(uarg);
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
+/* Abort a zerocopy operation and revert zckey on error in send syscall */
+static inline void skb_zcopy_abort(struct sk_buff *skb)
+{
+ struct ubuf_info *uarg = skb_zcopy(skb);
+
+ if (uarg) {
+ sock_zerocopy_put_abort(uarg);
+ skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
+ }
+}
+
/**
* skb_queue_empty - check if a queue is empty
* @list: queue head
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8b13db5163cc..8ad963cdc88c 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -287,6 +287,7 @@ struct ucred {
#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
#define MSG_EOF MSG_FIN
+#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
descriptor received through