aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/xen-netback/common.h39
-rw-r--r--drivers/net/xen-netback/interface.c65
-rw-r--r--drivers/net/xen-netback/netback.c432
3 files changed, 371 insertions, 165 deletions
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 8f264df8818a..5a991266a394 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -79,6 +79,17 @@ struct pending_tx_info {
* if it is head of one or more tx
* reqs
*/
+ /* Callback data for released SKBs. The callback is always
+ * xenvif_zerocopy_callback, desc contains the pending_idx, which is
+ * also an index in pending_tx_info array. It is initialized in
+ * xenvif_alloc and it never changes.
+ * skb_shinfo(skb)->destructor_arg points to the first mapped slot's
+ * callback_struct in this array of struct pending_tx_info's, then ctx
+ * to the next, or NULL if there is no more slot for this skb.
+ * ubuf_to_vif is a helper which finds the struct xenvif from a pointer
+ * to this field.
+ */
+ struct ubuf_info callback_struct;
};
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
@@ -135,13 +146,31 @@ struct xenvif {
pending_ring_idx_t pending_cons;
u16 pending_ring[MAX_PENDING_REQS];
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
/* Coalescing tx requests before copying makes number of grant
* copy ops greater or equal to number of slots required. In
* worst case a tx request consumes 2 gnttab_copy.
*/
struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
-
+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ /* passed to gnttab_[un]map_refs with pages under (un)mapping */
+ struct page *pages_to_map[MAX_PENDING_REQS];
+ struct page *pages_to_unmap[MAX_PENDING_REQS];
+
+ /* This prevents zerocopy callbacks to race over dealloc_ring */
+ spinlock_t callback_lock;
+ /* This prevents dealloc thread and NAPI instance to race over response
+ * creation and pending_ring in xenvif_idx_release. In xenvif_tx_err
+ * it only protect response creation
+ */
+ spinlock_t response_lock;
+ pending_ring_idx_t dealloc_prod;
+ pending_ring_idx_t dealloc_cons;
+ u16 dealloc_ring[MAX_PENDING_REQS];
+ struct task_struct *dealloc_task;
+ wait_queue_head_t dealloc_wq;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -228,6 +257,8 @@ int xenvif_tx_action(struct xenvif *vif, int budget);
int xenvif_kthread_guest_rx(void *data);
void xenvif_kick_thread(struct xenvif *vif);
+int xenvif_dealloc_kthread(void *data);
+
/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
@@ -235,6 +266,12 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
void xenvif_stop_queue(struct xenvif *vif);
+/* Callback from stack when TX packet can be released */
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
+/* Unmap a pending page and release it back to the guest */
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
+
static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
{
return MAX_PENDING_REQS -
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bc32627a22cb..1fe9fe523cc8 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
#include <xen/events.h>
#include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
local_irq_save(flags);
RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
- if (!more_to_do)
+ if (!(more_to_do &&
+ xenvif_tx_pending_slots_available(vif)))
__napi_complete(napi);
local_irq_restore(flags);
@@ -121,7 +123,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(skb->dev != dev);
/* Drop the packet if vif is not ready */
- if (vif->task == NULL || !xenvif_schedulable(vif))
+ if (vif->task == NULL ||
+ vif->dealloc_task == NULL ||
+ !xenvif_schedulable(vif))
goto drop;
/* At best we'll need one slot for the header and one for each
@@ -343,8 +347,26 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->pending_prod = MAX_PENDING_REQS;
for (i = 0; i < MAX_PENDING_REQS; i++)
vif->pending_ring[i] = i;
- for (i = 0; i < MAX_PENDING_REQS; i++)
- vif->mmap_pages[i] = NULL;
+ spin_lock_init(&vif->callback_lock);
+ spin_lock_init(&vif->response_lock);
+ /* If ballooning is disabled, this will consume real memory, so you
+ * better enable it. The long term solution would be to use just a
+ * bunch of valid page descriptors, without dependency on ballooning
+ */
+ err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+ vif->mmap_pages,
+ false);
+ if (err) {
+ netdev_err(dev, "Could not reserve mmap_pages\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+ vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
+ { .callback = xenvif_zerocopy_callback,
+ .ctx = NULL,
+ .desc = i };
+ vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+ }
/*
* Initialise a dummy MAC address. We choose the numerically
@@ -382,12 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
BUG_ON(vif->tx_irq);
BUG_ON(vif->task);
+ BUG_ON(vif->dealloc_task);
err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
if (err < 0)
goto err;
init_waitqueue_head(&vif->wq);
+ init_waitqueue_head(&vif->dealloc_wq);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -431,6 +455,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
vif->task = task;
+ task = kthread_create(xenvif_dealloc_kthread,
+ (void *)vif, "%s-dealloc", vif->dev->name);
+ if (IS_ERR(task)) {
+ pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+ err = PTR_ERR(task);
+ goto err_rx_unbind;
+ }
+
+ vif->dealloc_task = task;
+
rtnl_lock();
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -441,6 +475,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
rtnl_unlock();
wake_up_process(vif->task);
+ wake_up_process(vif->dealloc_task);
return 0;
@@ -478,6 +513,11 @@ void xenvif_disconnect(struct xenvif *vif)
vif->task = NULL;
}
+ if (vif->dealloc_task) {
+ kthread_stop(vif->dealloc_task);
+ vif->dealloc_task = NULL;
+ }
+
if (vif->tx_irq) {
if (vif->tx_irq == vif->rx_irq)
unbind_from_irqhandler(vif->tx_irq, vif);
@@ -493,6 +533,23 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_free(struct xenvif *vif)
{
+ int i, unmap_timeout = 0;
+
+ for (i = 0; i < MAX_PENDING_REQS; ++i) {
+ if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+ unmap_timeout++;
+ schedule_timeout(msecs_to_jiffies(1000));
+ if (unmap_timeout > 9 &&
+ net_ratelimit())
+ netdev_err(vif->dev,
+ "Page still granted! Index: %x\n",
+ i);
+ i = -1;
+ }
+ }
+
+ free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
+
netif_napi_del(&vif->napi);
unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e9391badfa4a..cb29134147d1 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -101,10 +101,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
}
+/* Find the containing VIF's structure from a pointer in pending_tx_info array
+ */
static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf)
{
- return NULL;
+ u16 pending_idx = ubuf->desc;
+ struct pending_tx_info *temp =
+ container_of(ubuf, struct pending_tx_info, callback_struct);
+ return container_of(temp - pending_idx,
+ struct xenvif,
+ pending_tx_info[0]);
}
+
/* This is a miniumum size for the linear area to avoid lots of
* calls to __pskb_pull_tail() as we set up checksum offsets. The
* value 128 was chosen as it covers all IPv4 and most likely
@@ -665,9 +673,12 @@ static void xenvif_tx_err(struct xenvif *vif,
struct xen_netif_tx_request *txp, RING_IDX end)
{
RING_IDX cons = vif->tx.req_cons;
+ unsigned long flags;
do {
+ spin_lock_irqsave(&vif->response_lock, flags);
make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
+ spin_unlock_irqrestore(&vif->response_lock, flags);
if (cons == end)
break;
txp = RING_GET_REQUEST(&vif->tx, cons++);
@@ -799,10 +810,24 @@ struct xenvif_tx_cb {
#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
-static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
- struct sk_buff *skb,
- struct xen_netif_tx_request *txp,
- struct gnttab_copy *gop)
+static inline void xenvif_tx_create_gop(struct xenvif *vif,
+ u16 pending_idx,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *gop)
+{
+ vif->pages_to_map[gop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
+ gnttab_set_map_op(gop, idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, vif->domid);
+
+ memcpy(&vif->pending_tx_info[pending_idx].req, txp,
+ sizeof(*txp));
+}
+
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *gop)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
@@ -823,83 +848,12 @@ static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
- /* Coalesce tx requests, at this point the packet passed in
- * should be <= 64K. Any packets larger than 64K have been
- * handled in xenvif_count_requests().
- */
- for (shinfo->nr_frags = slot = start; slot < nr_slots;
- shinfo->nr_frags++) {
- struct pending_tx_info *pending_tx_info =
- vif->pending_tx_info;
-
- page = alloc_page(GFP_ATOMIC|__GFP_COLD);
- if (!page)
- goto err;
-
- dst_offset = 0;
- first = NULL;
- while (dst_offset < PAGE_SIZE && slot < nr_slots) {
- gop->flags = GNTCOPY_source_gref;
-
- gop->source.u.ref = txp->gref;
- gop->source.domid = vif->domid;
- gop->source.offset = txp->offset;
-
- gop->dest.domid = DOMID_SELF;
-
- gop->dest.offset = dst_offset;
- gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-
- if (dst_offset + txp->size > PAGE_SIZE) {
- /* This page can only merge a portion
- * of tx request. Do not increment any
- * pointer / counter here. The txp
- * will be dealt with in future
- * rounds, eventually hitting the
- * `else` branch.
- */
- gop->len = PAGE_SIZE - dst_offset;
- txp->offset += gop->len;
- txp->size -= gop->len;
- dst_offset += gop->len; /* quit loop */
- } else {
- /* This tx request can be merged in the page */
- gop->len = txp->size;
- dst_offset += gop->len;
-
+ for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+ shinfo->nr_frags++, txp++, gop++) {
index = pending_index(vif->pending_cons++);
-
pending_idx = vif->pending_ring[index];
-
- memcpy(&pending_tx_info[pending_idx].req, txp,
- sizeof(*txp));
-
- /* Poison these fields, corresponding
- * fields for head tx req will be set
- * to correct values after the loop.
- */
- vif->mmap_pages[pending_idx] = (void *)(~0UL);
- pending_tx_info[pending_idx].head =
- INVALID_PENDING_RING_IDX;
-
- if (!first) {
- first = &pending_tx_info[pending_idx];
- start_idx = index;
- head_idx = pending_idx;
- }
-
- txp++;
- slot++;
- }
-
- gop++;
- }
-
- first->req.offset = 0;
- first->req.size = dst_offset;
- first->head = start_idx;
- vif->mmap_pages[head_idx] = page;
- frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
+ xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+ frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
}
BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
@@ -919,11 +873,38 @@ err:
return NULL;
}
+static inline void xenvif_grant_handle_set(struct xenvif *vif,
+ u16 pending_idx,
+ grant_handle_t handle)
+{
+ if (unlikely(vif->grant_tx_handle[pending_idx] !=
+ NETBACK_INVALID_HANDLE)) {
+ netdev_err(vif->dev,
+ "Trying to overwrite active handle! pending_idx: %x\n",
+ pending_idx);
+ BUG();
+ }
+ vif->grant_tx_handle[pending_idx] = handle;
+}
+
+static inline void xenvif_grant_handle_reset(struct xenvif *vif,
+ u16 pending_idx)
+{
+ if (unlikely(vif->grant_tx_handle[pending_idx] ==
+ NETBACK_INVALID_HANDLE)) {
+ netdev_err(vif->dev,
+ "Trying to unmap invalid handle! pending_idx: %x\n",
+ pending_idx);
+ BUG();
+ }
+ vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
+}
+
static int xenvif_tx_check_gop(struct xenvif *vif,
struct sk_buff *skb,
- struct gnttab_copy **gopp)
+ struct gnttab_map_grant_ref **gopp)
{
- struct gnttab_copy *gop = *gopp;
+ struct gnttab_map_grant_ref *gop = *gopp;
u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct pending_tx_info *tx_info;
@@ -935,6 +916,8 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
err = gop->status;
if (unlikely(err))
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+ else
+ xenvif_grant_handle_set(vif, pending_idx , gop->handle);
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
@@ -948,18 +931,13 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
head = tx_info->head;
/* Check error status: if okay then remember grant handle. */
- do {
newerr = (++gop)->status;
- if (newerr)
- break;
- peek = vif->pending_ring[pending_index(++head)];
- } while (!pending_tx_is_head(vif, peek));
if (likely(!newerr)) {
+ xenvif_grant_handle_set(vif, pending_idx , gop->handle);
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
continue;
}
@@ -972,11 +950,10 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
/* First error: invalidate header and preceding fragments. */
pending_idx = XENVIF_TX_CB(skb)->pending_idx;
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
for (j = start; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
}
/* Remember the error: invalidate all subsequent fragments. */
@@ -992,6 +969,10 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
int i;
+ u16 prev_pending_idx = INVALID_PENDING_IDX;
+
+ if (skb_shinfo(skb)->destructor_arg)
+ prev_pending_idx = XENVIF_TX_CB(skb)->pending_idx;
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = shinfo->frags + i;
@@ -1001,6 +982,17 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
pending_idx = frag_get_pending_idx(frag);
+ /* If this is not the first frag, chain it to the previous*/
+ if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
+ skb_shinfo(skb)->destructor_arg =
+ &vif->pending_tx_info[pending_idx].callback_struct;
+ else if (likely(pending_idx != prev_pending_idx))
+ vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
+ &(vif->pending_tx_info[pending_idx].callback_struct);
+
+ vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+ prev_pending_idx = pending_idx;
+
txp = &vif->pending_tx_info[pending_idx].req;
page = virt_to_page(idx_to_kaddr(vif, pending_idx));
__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
@@ -1008,10 +1000,15 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
skb->data_len += txp->size;
skb->truesize += txp->size;
- /* Take an extra reference to offset xenvif_idx_release */
+ /* Take an extra reference to offset network stack's put_page */
get_page(vif->mmap_pages[pending_idx]);
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
+ /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
+ * overlaps with "index", and "mapping" is not set. I think mapping
+ * should be set. If delivered to local stack, it would drop this
+ * skb in sk_filter unless the socket has the right to use it.
+ */
+ skb->pfmemalloc = false;
}
static int xenvif_get_extras(struct xenvif *vif,
@@ -1131,7 +1128,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
{
- struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
+ struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
struct sk_buff *skb;
int ret;
@@ -1238,30 +1235,10 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
}
}
- /* XXX could copy straight to head */
- page = xenvif_alloc_page(vif, pending_idx);
- if (!page) {
- kfree_skb(skb);
- xenvif_tx_err(vif, &txreq, idx);
- break;
- }
-
- gop->source.u.ref = txreq.gref;
- gop->source.domid = vif->domid;
- gop->source.offset = txreq.offset;
-
- gop->dest.u.gmfn = virt_to_mfn(page_address(page));
- gop->dest.domid = DOMID_SELF;
- gop->dest.offset = txreq.offset;
-
- gop->len = txreq.size;
- gop->flags = GNTCOPY_source_gref;
+ xenvif_tx_create_gop(vif, pending_idx, &txreq, gop);
gop++;
- memcpy(&vif->pending_tx_info[pending_idx].req,
- &txreq, sizeof(txreq));
- vif->pending_tx_info[pending_idx].head = index;
XENVIF_TX_CB(skb)->pending_idx = pending_idx;
__skb_put(skb, data_len);
@@ -1290,17 +1267,17 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
vif->tx.req_cons = idx;
- if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
+ if ((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops))
break;
}
- return gop - vif->tx_copy_ops;
+ return gop - vif->tx_map_ops;
}
static int xenvif_tx_submit(struct xenvif *vif)
{
- struct gnttab_copy *gop = vif->tx_copy_ops;
+ struct gnttab_map_grant_ref *gop = vif->tx_map_ops;
struct sk_buff *skb;
int work_done = 0;
@@ -1324,14 +1301,17 @@ static int xenvif_tx_submit(struct xenvif *vif)
memcpy(skb->data,
(void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
data_len);
+ vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
txp->offset += data_len;
txp->size -= data_len;
+ skb_shinfo(skb)->destructor_arg =
+ &vif->pending_tx_info[pending_idx].callback_struct;
} else {
/* Schedule a response immediately. */
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ skb_shinfo(skb)->destructor_arg = NULL;
+ xenvif_idx_unmap(vif, pending_idx);
}
if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1353,6 +1333,9 @@ static int xenvif_tx_submit(struct xenvif *vif)
if (checksum_setup(vif, skb)) {
netdev_dbg(vif->dev,
"Can't setup checksum in net_tx_action\n");
+ /* We have to set this flag to trigger the callback */
+ if (skb_shinfo(skb)->destructor_arg)
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
kfree_skb(skb);
continue;
}
@@ -1378,6 +1361,14 @@ static int xenvif_tx_submit(struct xenvif *vif)
work_done++;
+ /* Set this flag right before netif_receive_skb, otherwise
+ * someone might think this packet already left netback, and
+ * do a skb_copy_ubufs while we are still in control of the
+ * skb. E.g. the __pskb_pull_tail earlier can do such thing.
+ */
+ if (skb_shinfo(skb)->destructor_arg)
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+
netif_receive_skb(skb);
}
@@ -1386,14 +1377,111 @@ static int xenvif_tx_submit(struct xenvif *vif)
void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
{
- return;
+ unsigned long flags;
+ pending_ring_idx_t index;
+ struct xenvif *vif = ubuf_to_vif(ubuf);
+
+ /* This is the only place where we grab this lock, to protect callbacks
+ * from each other.
+ */
+ spin_lock_irqsave(&vif->callback_lock, flags);
+ do {
+ u16 pending_idx = ubuf->desc;
+ ubuf = (struct ubuf_info *) ubuf->ctx;
+ BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
+ MAX_PENDING_REQS);
+ index = pending_index(vif->dealloc_prod);
+ vif->dealloc_ring[index] = pending_idx;
+ /* Sync with xenvif_tx_dealloc_action:
+ * insert idx then incr producer.
+ */
+ smp_wmb();
+ vif->dealloc_prod++;
+ } while (ubuf);
+ wake_up(&vif->dealloc_wq);
+ spin_unlock_irqrestore(&vif->callback_lock, flags);
+
+ if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx) &&
+ xenvif_tx_pending_slots_available(vif)) {
+ local_bh_disable();
+ napi_schedule(&vif->napi);
+ local_bh_enable();
+ }
+}
+
+static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
+{
+ struct gnttab_unmap_grant_ref *gop;
+ pending_ring_idx_t dc, dp;
+ u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
+ unsigned int i = 0;
+
+ dc = vif->dealloc_cons;
+ gop = vif->tx_unmap_ops;
+
+ /* Free up any grants we have finished using */
+ do {
+ dp = vif->dealloc_prod;
+
+ /* Ensure we see all indices enqueued by all
+ * xenvif_zerocopy_callback().
+ */
+ smp_rmb();
+
+ while (dc != dp) {
+ BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
+ pending_idx =
+ vif->dealloc_ring[pending_index(dc++)];
+
+ pending_idx_release[gop-vif->tx_unmap_ops] =
+ pending_idx;
+ vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
+ vif->mmap_pages[pending_idx];
+ gnttab_set_unmap_op(gop,
+ idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map,
+ vif->grant_tx_handle[pending_idx]);
+ /* Btw. already unmapped? */
+ xenvif_grant_handle_reset(vif, pending_idx);
+ ++gop;
+ }
+
+ } while (dp != vif->dealloc_prod);
+
+ vif->dealloc_cons = dc;
+
+ if (gop - vif->tx_unmap_ops > 0) {
+ int ret;
+ ret = gnttab_unmap_refs(vif->tx_unmap_ops,
+ NULL,
+ vif->pages_to_unmap,
+ gop - vif->tx_unmap_ops);
+ if (ret) {
+ netdev_err(vif->dev, "Unmap fail: nr_ops %x ret %d\n",
+ gop - vif->tx_unmap_ops, ret);
+ for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
+ if (gop[i].status != GNTST_okay)
+ netdev_err(vif->dev,
+ " host_addr: %llx handle: %x status: %d\n",
+ gop[i].host_addr,
+ gop[i].handle,
+ gop[i].status);
+ }
+ BUG();
+ }
+ }
+
+ for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
+ xenvif_idx_release(vif, pending_idx_release[i],
+ XEN_NETIF_RSP_OKAY);
}
+
/* Called after netfront has transmitted */
int xenvif_tx_action(struct xenvif *vif, int budget)
{
unsigned nr_gops;
- int work_done;
+ int work_done, ret;
if (unlikely(!tx_work_todo(vif)))
return 0;
@@ -1403,7 +1491,11 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
if (nr_gops == 0)
return 0;
- gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
+ ret = gnttab_map_refs(vif->tx_map_ops,
+ NULL,
+ vif->pages_to_map,
+ nr_gops);
+ BUG_ON(ret);
work_done = xenvif_tx_submit(vif);
@@ -1414,45 +1506,19 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
u8 status)
{
struct pending_tx_info *pending_tx_info;
- pending_ring_idx_t head;
+ pending_ring_idx_t index;
u16 peek; /* peek into next tx request */
+ unsigned long flags;
- BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
-
- /* Already complete? */
- if (vif->mmap_pages[pending_idx] == NULL)
- return;
-
- pending_tx_info = &vif->pending_tx_info[pending_idx];
-
- head = pending_tx_info->head;
-
- BUG_ON(!pending_tx_is_head(vif, head));
- BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
-
- do {
- pending_ring_idx_t index;
- pending_ring_idx_t idx = pending_index(head);
- u16 info_idx = vif->pending_ring[idx];
-
- pending_tx_info = &vif->pending_tx_info[info_idx];
+ pending_tx_info = &vif->pending_tx_info[pending_idx];
+ spin_lock_irqsave(&vif->response_lock, flags);
make_tx_response(vif, &pending_tx_info->req, status);
-
- /* Setting any number other than
- * INVALID_PENDING_RING_IDX indicates this slot is
- * starting a new packet / ending a previous packet.
- */
- pending_tx_info->head = 0;
-
- index = pending_index(vif->pending_prod++);
- vif->pending_ring[index] = vif->pending_ring[info_idx];
-
- peek = vif->pending_ring[pending_index(++head)];
-
- } while (!pending_tx_is_head(vif, peek));
-
- put_page(vif->mmap_pages[pending_idx]);
- vif->mmap_pages[pending_idx] = NULL;
+ index = pending_index(vif->pending_prod);
+ vif->pending_ring[index] = pending_idx;
+ /* TX shouldn't use the index before we give it back here */
+ mb();
+ vif->pending_prod++;
+ spin_unlock_irqrestore(&vif->response_lock, flags);
}
@@ -1500,6 +1566,25 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
return resp;
}
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
+{
+ int ret;
+ struct gnttab_unmap_grant_ref tx_unmap_op;
+
+ gnttab_set_unmap_op(&tx_unmap_op,
+ idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map,
+ vif->grant_tx_handle[pending_idx]);
+ /* Btw. already unmapped? */
+ xenvif_grant_handle_reset(vif, pending_idx);
+
+ ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
+ &vif->mmap_pages[pending_idx], 1);
+ BUG_ON(ret);
+
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+}
+
static inline int rx_work_todo(struct xenvif *vif)
{
return !skb_queue_empty(&vif->rx_queue) &&
@@ -1516,6 +1601,11 @@ static inline int tx_work_todo(struct xenvif *vif)
return 0;
}
+static inline bool tx_dealloc_work_todo(struct xenvif *vif)
+{
+ return vif->dealloc_cons != vif->dealloc_prod;
+}
+
void xenvif_unmap_frontend_rings(struct xenvif *vif)
{
if (vif->tx.sring)
@@ -1602,6 +1692,28 @@ int xenvif_kthread_guest_rx(void *data)
return 0;
}
+int xenvif_dealloc_kthread(void *data)
+{
+ struct xenvif *vif = data;
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(vif->dealloc_wq,
+ tx_dealloc_work_todo(vif) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ xenvif_tx_dealloc_action(vif);
+ cond_resched();
+ }
+
+ /* Unmap anything remaining*/
+ if (tx_dealloc_work_todo(vif))
+ xenvif_tx_dealloc_action(vif);
+
+ return 0;
+}
+
static int __init netback_init(void)
{
int rc = 0;