From 0b088e003ccf316a76c51be5dec2d70b93be3be8 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Mon, 24 May 2010 20:12:41 -0700 Subject: RDS: Use page_remainder_alloc() for recv bufs Instead of splitting up a page into RDS_FRAG_SIZE chunks ourselves, ask rds_page_remainder_alloc() to do it. While it is possible PAGE_SIZE > FRAG_SIZE, on x86en it isn't, so having duplicate "carve up a page into buffers" code seems excessive. The other modification this spawns is the use of a single struct scatterlist in rds_page_frag instead of a bare page ptr. This causes verbosity to increase in some places, and decrease in others. Finally, I decided to unify the lifetimes and alloc/free of rds_page_frag and its page. This is a nice simplification in itself, but will be extra-nice once we come to adding cmason's recycling patch. Signed-off-by: Andy Grover --- net/rds/ib.h | 7 +---- net/rds/ib_recv.c | 94 ++++++++++++++++--------------------------------------- net/rds/page.c | 1 + 3 files changed, 29 insertions(+), 73 deletions(-) diff --git a/net/rds/ib.h b/net/rds/ib.h index 282ec69fe282..9bb7a7412a44 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -28,13 +28,9 @@ extern struct list_head rds_ib_devices; * try and minimize the amount of memory tied up both the device and * socket receive queues. */ -/* page offset of the final full frag that fits in the page */ -#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) struct rds_page_frag { struct list_head f_item; - struct page *f_page; - unsigned long f_offset; - dma_addr_t f_mapped; + struct scatterlist f_sg; }; struct rds_ib_incoming { @@ -107,7 +103,6 @@ struct rds_ib_connection { struct rds_header *i_recv_hdrs; u64 i_recv_hdrs_dma; struct rds_ib_recv_work *i_recvs; - struct rds_page_frag i_frag; u64 i_ack_recv; /* last ACK received */ /* sending acks */ diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 24d14615f41a..f6dbf16e0741 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -43,17 +43,11 @@ static struct kmem_cache *rds_ib_incoming_slab; static struct kmem_cache *rds_ib_frag_slab; static atomic_t rds_ib_allocation = ATOMIC_INIT(0); -static void rds_ib_frag_drop_page(struct rds_page_frag *frag) -{ - rdsdebug("frag %p page %p\n", frag, frag->f_page); - __free_page(frag->f_page); - frag->f_page = NULL; -} - +/* Free frag and attached recv buffer f_sg */ static void rds_ib_frag_free(struct rds_page_frag *frag) { - rdsdebug("frag %p page %p\n", frag, frag->f_page); - BUG_ON(frag->f_page); + rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg)); + __free_page(sg_page(&frag->f_sg)); kmem_cache_free(rds_ib_frag_slab, frag); } @@ -71,12 +65,8 @@ static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic, { struct rds_page_frag *frag = recv->r_frag; - rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); - if (frag->f_mapped) - ib_dma_unmap_page(ic->i_cm_id->device, - frag->f_mapped, - RDS_FRAG_SIZE, DMA_FROM_DEVICE); - frag->f_mapped = 0; + rdsdebug("recv %p frag %p page %p\n", recv, frag, sg_page(&frag->f_sg)); + ib_dma_unmap_sg(ic->i_cm_id->device, &frag->f_sg, 1, DMA_FROM_DEVICE); } void rds_ib_recv_init_ring(struct rds_ib_connection *ic) @@ -116,8 +106,6 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, } if (recv->r_frag) { rds_ib_recv_unmap_page(ic, recv); - if (recv->r_frag->f_page) - rds_ib_frag_drop_page(recv->r_frag); rds_ib_frag_free(recv->r_frag); recv->r_frag = NULL; } @@ -129,16 +117,12 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic) for (i = 0; i < ic->i_recv_ring.w_nr; i++) rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); - - if (ic->i_frag.f_page) - rds_ib_frag_drop_page(&ic->i_frag); } static int rds_ib_recv_refill_one(struct rds_connection *conn, struct rds_ib_recv_work *recv) { struct rds_ib_connection *ic = conn->c_transport_data; - dma_addr_t dma_addr; struct ib_sge *sge; int ret = -ENOMEM; @@ -161,50 +145,27 @@ static int rds_ib_recv_refill_one(struct rds_connection *conn, if (!recv->r_frag) goto out; INIT_LIST_HEAD(&recv->r_frag->f_item); - recv->r_frag->f_page = NULL; - } - - if (!ic->i_frag.f_page) { - ic->i_frag.f_page = alloc_page(GFP_NOWAIT); - if (!ic->i_frag.f_page) + sg_init_table(&recv->r_frag->f_sg, 1); + ret = rds_page_remainder_alloc(&recv->r_frag->f_sg, + RDS_FRAG_SIZE, GFP_NOWAIT); + if (ret) { + kmem_cache_free(rds_ib_frag_slab, recv->r_frag); + recv->r_frag = NULL; goto out; - ic->i_frag.f_offset = 0; + } } - dma_addr = ib_dma_map_page(ic->i_cm_id->device, - ic->i_frag.f_page, - ic->i_frag.f_offset, - RDS_FRAG_SIZE, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) - goto out; - - /* - * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() - * must be called on this recv. This happens as completions hit - * in order or on connection shutdown. - */ - recv->r_frag->f_page = ic->i_frag.f_page; - recv->r_frag->f_offset = ic->i_frag.f_offset; - recv->r_frag->f_mapped = dma_addr; + ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, + 1, DMA_FROM_DEVICE); + WARN_ON(ret != 1); sge = &recv->r_sge[0]; sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); sge->length = sizeof(struct rds_header); sge = &recv->r_sge[1]; - sge->addr = dma_addr; - sge->length = RDS_FRAG_SIZE; - - get_page(recv->r_frag->f_page); - - if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { - ic->i_frag.f_offset += RDS_FRAG_SIZE; - } else { - put_page(ic->i_frag.f_page); - ic->i_frag.f_page = NULL; - ic->i_frag.f_offset = 0; - } + sge->addr = sg_dma_address(&recv->r_frag->f_sg); + sge->length = sg_dma_len(&recv->r_frag->f_sg); ret = 0; out: @@ -247,8 +208,8 @@ int rds_ib_recv_refill(struct rds_connection *conn, int prefill) /* XXX when can this fail? */ ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, - recv->r_ibinc, recv->r_frag->f_page, - (long) recv->r_frag->f_mapped, ret); + recv->r_ibinc, sg_page(&recv->r_frag->f_sg), + (long) sg_dma_address(&recv->r_frag->f_sg), ret); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " @@ -281,7 +242,6 @@ static void rds_ib_inc_purge(struct rds_incoming *inc) list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { list_del_init(&frag->f_item); - rds_ib_frag_drop_page(frag); rds_ib_frag_free(frag); } } @@ -333,13 +293,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, to_copy = min_t(unsigned long, to_copy, len - copied); rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " - "[%p, %lu] + %lu\n", + "[%p, %u] + %lu\n", to_copy, iov->iov_base, iov->iov_len, iov_off, - frag->f_page, frag->f_offset, frag_off); + sg_page(&frag->f_sg), frag->f_sg.offset, frag_off); /* XXX needs + offset for multiple recvs per page */ - ret = rds_page_copy_to_user(frag->f_page, - frag->f_offset + frag_off, + ret = rds_page_copy_to_user(sg_page(&frag->f_sg), + frag->f_sg.offset + frag_off, iov->iov_base + iov_off, to_copy); if (ret) { @@ -595,7 +555,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn, to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ - addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); + addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0); src = addr + frag_off; dst = (void *)map->m_page_addrs[map_page] + map_off; @@ -698,12 +658,12 @@ static void rds_ib_process_recv(struct rds_connection *conn, * the inc is freed. We don't go that route, so we have to drop the * page ref ourselves. We can't just leave the page on the recv * because that confuses the dma mapping of pages and each recv's use - * of a partial page. We can leave the frag, though, it will be - * reused. + * of a partial page. * * FIXME: Fold this into the code path below. */ - rds_ib_frag_drop_page(recv->r_frag); + rds_ib_frag_free(recv->r_frag); + recv->r_frag = NULL; return; } diff --git a/net/rds/page.c b/net/rds/page.c index e5b2527ae257..5e44f5ae7898 100644 --- a/net/rds/page.c +++ b/net/rds/page.c @@ -186,6 +186,7 @@ out: ret ? 0 : scat->length); return ret; } +EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); static int rds_page_remainder_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -- cgit v1.2.3-59-g8ed1b