diff options
Diffstat (limited to 'sys/kern/subr_pool.c')
-rw-r--r-- | sys/kern/subr_pool.c | 224 |
1 files changed, 102 insertions, 122 deletions
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c index 1f9e7366b43..2ab398290ab 100644 --- a/sys/kern/subr_pool.c +++ b/sys/kern/subr_pool.c @@ -1,4 +1,4 @@ -/* $OpenBSD: subr_pool.c,v 1.148 2014/08/27 00:22:26 dlg Exp $ */ +/* $OpenBSD: subr_pool.c,v 1.149 2014/09/04 00:36:00 dlg Exp $ */ /* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */ /*- @@ -84,7 +84,6 @@ struct pool_item_header { int ph_nmissing; /* # of chunks in use */ caddr_t ph_page; /* this page's address */ caddr_t ph_colored; /* page's colored address */ - int ph_pagesize; int ph_magic; }; @@ -119,21 +118,35 @@ void *pool_allocator_alloc(struct pool *, int, int *); void pool_allocator_free(struct pool *, void *); /* - * XXX - quick hack. For pools with large items we want to use a special - * allocator. For now, instead of having the allocator figure out - * the allocation size from the pool (which can be done trivially - * with round_page(pr_itemsperpage * pr_size)) which would require - * lots of changes everywhere, we just create allocators for each - * size. We limit those to 128 pages. + * The default pool allocator. */ -#define POOL_LARGE_MAXPAGES 128 -struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES]; -struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES]; +void *pool_page_alloc(struct pool *, int, int *); +void pool_page_free(struct pool *, void *); + +/* + * safe for interrupts, name preserved for compat this is the default + * allocator + */ +struct pool_allocator pool_allocator_nointr = { + pool_page_alloc, + pool_page_free +}; + void *pool_large_alloc(struct pool *, int, int *); void pool_large_free(struct pool *, void *); + +struct pool_allocator pool_allocator_large = { + pool_large_alloc, + pool_large_free +}; + void *pool_large_alloc_ni(struct pool *, int, int *); void pool_large_free_ni(struct pool *, void *); +struct pool_allocator pool_allocator_large_ni = { + pool_large_alloc_ni, + pool_large_free_ni +}; #ifdef DDB void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...) @@ -173,7 +186,7 @@ pr_find_pagehead(struct pool *pp, void *v) if ((pp->pr_roflags & PR_PHINPAGE) != 0) { caddr_t page; - page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask); + page = (caddr_t)((vaddr_t)v & pp->pr_pgmask); return ((struct pool_item_header *)(page + pp->pr_phoffset)); } @@ -186,7 +199,7 @@ pr_find_pagehead(struct pool *pp, void *v) } KASSERT(ph->ph_page <= (caddr_t)v); - if (ph->ph_page + ph->ph_pagesize <= (caddr_t)v) { + if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) { panic("pr_find_pagehead: %s: incorrect page", pp->pr_wchan); } @@ -246,7 +259,8 @@ void pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, const char *wchan, struct pool_allocator *palloc) { - int off, slack; + int off = 0, slack; + unsigned int pgsize = PAGE_SIZE, items; #ifdef DIAGNOSTIC struct pool *iter; KASSERT(ioff == 0); @@ -256,48 +270,6 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, if ((flags & PR_DEBUG) && align != 0) flags &= ~PR_DEBUG; #endif - /* - * Check arguments and construct default values. - */ - if (palloc == NULL) { - if (size > PAGE_SIZE) { - int psize; - - /* - * XXX - should take align into account as well. - */ - if (size == round_page(size)) - psize = size / PAGE_SIZE; - else - psize = PAGE_SIZE / roundup(size % PAGE_SIZE, - 1024); - if (psize > POOL_LARGE_MAXPAGES) - psize = POOL_LARGE_MAXPAGES; - if (flags & PR_WAITOK) - palloc = &pool_allocator_large_ni[psize-1]; - else - palloc = &pool_allocator_large[psize-1]; - if (palloc->pa_pagesz == 0) { - palloc->pa_pagesz = psize * PAGE_SIZE; - if (flags & PR_WAITOK) { - palloc->pa_alloc = pool_large_alloc_ni; - palloc->pa_free = pool_large_free_ni; - } else { - palloc->pa_alloc = pool_large_alloc; - palloc->pa_free = pool_large_free; - } - } - } else { - palloc = &pool_allocator_nointr; - } - } - if (palloc->pa_pagesz == 0) { - palloc->pa_pagesz = PAGE_SIZE; - } - if (palloc->pa_pagemask == 0) { - palloc->pa_pagemask = ~(palloc->pa_pagesz - 1); - palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1; - } if (align == 0) align = ALIGN(1); @@ -306,15 +278,43 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, size = sizeof(struct pool_item); size = roundup(size, align); -#ifdef DIAGNOSTIC - if (size > palloc->pa_pagesz) - panic("pool_init: pool item size (%lu) too large", - (u_long)size); -#endif + + if (palloc == NULL) { + while (size > pgsize) + pgsize <<= 1; + + if (pgsize > PAGE_SIZE) { + palloc = ISSET(flags, PR_WAITOK) ? + &pool_allocator_large_ni : &pool_allocator_large; + } else + palloc = &pool_allocator_nointr; + } else + pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE; + + items = pgsize / size; + + /* + * Decide whether to put the page header off page to avoid + * wasting too large a part of the page. Off-page page headers + * go into an RB tree, so we can match a returned item with + * its header based on the page address. + */ + if (pgsize - (size * items) > sizeof(struct pool_item_header)) { + flags |= PR_PHINPAGE; + off = pgsize - sizeof(struct pool_item_header); + } else if (sizeof(struct pool_item_header) * 2 >= size) { + flags |= PR_PHINPAGE; + off = pgsize - sizeof(struct pool_item_header); + items = off / size; + } else + off = pgsize; + + KASSERT(items > 0); /* * Initialize the pool structure. */ + memset(pp, 0, sizeof(*pp)); LIST_INIT(&pp->pr_emptypages); LIST_INIT(&pp->pr_fullpages); LIST_INIT(&pp->pr_partpages); @@ -326,6 +326,10 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, pp->pr_roflags = flags; pp->pr_flags = 0; pp->pr_size = size; + pp->pr_pgsize = pgsize; + pp->pr_pgmask = ~0UL ^ (pgsize - 1); + pp->pr_phoffset = off; + pp->pr_itemsperpage = items; pp->pr_align = align; pp->pr_wchan = wchan; pp->pr_alloc = palloc; @@ -337,28 +341,7 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags, pp->pr_hardlimit_ratecap.tv_usec = 0; pp->pr_hardlimit_warning_last.tv_sec = 0; pp->pr_hardlimit_warning_last.tv_usec = 0; - - /* - * Decide whether to put the page header off page to avoid - * wasting too large a part of the page. Off-page page headers - * go into an RB tree, so we can match a returned item with - * its header based on the page address. - * We use 1/16 of the page size as the threshold (XXX: tune) - */ - if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) { - /* Use the end of the page for the page header */ - pp->pr_roflags |= PR_PHINPAGE; - pp->pr_phoffset = off = palloc->pa_pagesz - - ALIGN(sizeof(struct pool_item_header)); - } else { - /* The page header will be taken from our page header pool */ - pp->pr_phoffset = 0; - off = palloc->pa_pagesz; - RB_INIT(&pp->pr_phtree); - } - - pp->pr_itemsperpage = off / pp->pr_size; - KASSERT(pp->pr_itemsperpage != 0); + RB_INIT(&pp->pr_phtree); /* * Use the slack between the chunks and the page header @@ -882,7 +865,6 @@ pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph) LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist); XSIMPLEQ_INIT(&ph->ph_itemlist); ph->ph_page = storage; - ph->ph_pagesize = pp->pr_alloc->pa_pagesz; ph->ph_nmissing = 0; if ((pp->pr_roflags & PR_PHINPAGE) == 0) RB_INSERT(phtree, &pp->pr_phtree, ph); @@ -1282,7 +1264,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) int n; const char *label = pp->pr_wchan; - page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask); + page = (caddr_t)((u_long)ph & pp->pr_pgmask); if (page != ph->ph_page && (pp->pr_roflags & PR_PHINPAGE) != 0) { printf("%s: ", label); @@ -1319,8 +1301,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected) } } #endif /* DIAGNOSTIC */ - page = - (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask); + page = (caddr_t)((u_long)pi & pp->pr_pgmask); if (page == ph->ph_page) continue; @@ -1454,7 +1435,7 @@ sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp) mtx_enter(&pp->pr_mtx); pi.pr_size = pp->pr_size; - pi.pr_pgsize = pp->pr_alloc->pa_pagesz; + pi.pr_pgsize = pp->pr_pgsize; pi.pr_itemsperpage = pp->pr_itemsperpage; pi.pr_npages = pp->pr_npages; pi.pr_minpages = pp->pr_minpages; @@ -1483,33 +1464,6 @@ done: /* * Pool backend allocators. - * - * Each pool has a backend allocator that handles allocation, deallocation - */ -void *pool_page_alloc(struct pool *, int, int *); -void pool_page_free(struct pool *, void *); - -/* - * safe for interrupts, name preserved for compat this is the default - * allocator - */ -struct pool_allocator pool_allocator_nointr = { - pool_page_alloc, pool_page_free, 0, -}; - -/* - * XXX - we have at least three different resources for the same allocation - * and each resource can be depleted. First we have the ready elements in - * the pool. Then we have the resource (typically a vm_map) for this - * allocator, then we have physical memory. Waiting for any of these can - * be unnecessary when any other is freed, but the kernel doesn't support - * sleeping on multiple addresses, so we have to fake. The caller sleeps on - * the pool (so that we can be awakened when an item is returned to the pool), - * but we set PA_WANT on the allocator. When a page is returned to - * the allocator and PA_WANT is set pool_allocator_free will wakeup all - * sleeping pools belonging to this allocator. (XXX - thundering herd). - * We also wake up the allocator in case someone without a pool (malloc) - * is sleeping waiting for this allocator. */ void * @@ -1524,6 +1478,16 @@ pool_allocator_alloc(struct pool *pp, int flags, int *slowdown) if (waitok) mtx_enter(&pp->pr_mtx); +#ifdef DIAGNOSTIC + if (v != NULL && ISSET(pp->pr_roflags, PR_PHINPAGE)) { + vaddr_t addr = (vaddr_t)v; + if ((addr & pp->pr_pgmask) != addr) { + panic("%s: %s page address %p isnt aligned to %u", + __func__, pp->pr_wchan, v, pp->pr_pgsize); + } + } +#endif + return (v); } @@ -1543,28 +1507,31 @@ pool_page_alloc(struct pool *pp, int flags, int *slowdown) kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; - return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd)); + return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd)); } void pool_page_free(struct pool *pp, void *v) { - km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange); + km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange); } void * pool_large_alloc(struct pool *pp, int flags, int *slowdown) { + struct kmem_va_mode kv = kv_intrsafe; struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; void *v; int s; + if (ISSET(pp->pr_roflags, PR_PHINPAGE)) + kv.kv_align = pp->pr_pgsize; + kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; s = splvm(); - v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange, - &kd); + v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd); splx(s); return (v); @@ -1573,26 +1540,39 @@ pool_large_alloc(struct pool *pp, int flags, int *slowdown) void pool_large_free(struct pool *pp, void *v) { + struct kmem_va_mode kv = kv_intrsafe; int s; + if (ISSET(pp->pr_roflags, PR_PHINPAGE)) + kv.kv_align = pp->pr_pgsize; + s = splvm(); - km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange); + km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); splx(s); } void * pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown) { + struct kmem_va_mode kv = kv_any; struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER; + if (ISSET(pp->pr_roflags, PR_PHINPAGE)) + kv.kv_align = pp->pr_pgsize; + kd.kd_waitok = (flags & PR_WAITOK); kd.kd_slowdown = slowdown; - return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd)); + return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd)); } void pool_large_free_ni(struct pool *pp, void *v) { - km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange); + struct kmem_va_mode kv = kv_any; + + if (ISSET(pp->pr_roflags, PR_PHINPAGE)) + kv.kv_align = pp->pr_pgsize; + + km_free(v, pp->pr_pgsize, &kv, pp->pr_crange); } |