summaryrefslogtreecommitdiffstats
path: root/sys/kern/subr_pool.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/subr_pool.c')
-rw-r--r--sys/kern/subr_pool.c224
1 files changed, 102 insertions, 122 deletions
diff --git a/sys/kern/subr_pool.c b/sys/kern/subr_pool.c
index 1f9e7366b43..2ab398290ab 100644
--- a/sys/kern/subr_pool.c
+++ b/sys/kern/subr_pool.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: subr_pool.c,v 1.148 2014/08/27 00:22:26 dlg Exp $ */
+/* $OpenBSD: subr_pool.c,v 1.149 2014/09/04 00:36:00 dlg Exp $ */
/* $NetBSD: subr_pool.c,v 1.61 2001/09/26 07:14:56 chs Exp $ */
/*-
@@ -84,7 +84,6 @@ struct pool_item_header {
int ph_nmissing; /* # of chunks in use */
caddr_t ph_page; /* this page's address */
caddr_t ph_colored; /* page's colored address */
- int ph_pagesize;
int ph_magic;
};
@@ -119,21 +118,35 @@ void *pool_allocator_alloc(struct pool *, int, int *);
void pool_allocator_free(struct pool *, void *);
/*
- * XXX - quick hack. For pools with large items we want to use a special
- * allocator. For now, instead of having the allocator figure out
- * the allocation size from the pool (which can be done trivially
- * with round_page(pr_itemsperpage * pr_size)) which would require
- * lots of changes everywhere, we just create allocators for each
- * size. We limit those to 128 pages.
+ * The default pool allocator.
*/
-#define POOL_LARGE_MAXPAGES 128
-struct pool_allocator pool_allocator_large[POOL_LARGE_MAXPAGES];
-struct pool_allocator pool_allocator_large_ni[POOL_LARGE_MAXPAGES];
+void *pool_page_alloc(struct pool *, int, int *);
+void pool_page_free(struct pool *, void *);
+
+/*
+ * safe for interrupts, name preserved for compat this is the default
+ * allocator
+ */
+struct pool_allocator pool_allocator_nointr = {
+ pool_page_alloc,
+ pool_page_free
+};
+
void *pool_large_alloc(struct pool *, int, int *);
void pool_large_free(struct pool *, void *);
+
+struct pool_allocator pool_allocator_large = {
+ pool_large_alloc,
+ pool_large_free
+};
+
void *pool_large_alloc_ni(struct pool *, int, int *);
void pool_large_free_ni(struct pool *, void *);
+struct pool_allocator pool_allocator_large_ni = {
+ pool_large_alloc_ni,
+ pool_large_free_ni
+};
#ifdef DDB
void pool_print_pagelist(struct pool_pagelist *, int (*)(const char *, ...)
@@ -173,7 +186,7 @@ pr_find_pagehead(struct pool *pp, void *v)
if ((pp->pr_roflags & PR_PHINPAGE) != 0) {
caddr_t page;
- page = (caddr_t)((vaddr_t)v & pp->pr_alloc->pa_pagemask);
+ page = (caddr_t)((vaddr_t)v & pp->pr_pgmask);
return ((struct pool_item_header *)(page + pp->pr_phoffset));
}
@@ -186,7 +199,7 @@ pr_find_pagehead(struct pool *pp, void *v)
}
KASSERT(ph->ph_page <= (caddr_t)v);
- if (ph->ph_page + ph->ph_pagesize <= (caddr_t)v) {
+ if (ph->ph_page + pp->pr_pgsize <= (caddr_t)v) {
panic("pr_find_pagehead: %s: incorrect page",
pp->pr_wchan);
}
@@ -246,7 +259,8 @@ void
pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
const char *wchan, struct pool_allocator *palloc)
{
- int off, slack;
+ int off = 0, slack;
+ unsigned int pgsize = PAGE_SIZE, items;
#ifdef DIAGNOSTIC
struct pool *iter;
KASSERT(ioff == 0);
@@ -256,48 +270,6 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
if ((flags & PR_DEBUG) && align != 0)
flags &= ~PR_DEBUG;
#endif
- /*
- * Check arguments and construct default values.
- */
- if (palloc == NULL) {
- if (size > PAGE_SIZE) {
- int psize;
-
- /*
- * XXX - should take align into account as well.
- */
- if (size == round_page(size))
- psize = size / PAGE_SIZE;
- else
- psize = PAGE_SIZE / roundup(size % PAGE_SIZE,
- 1024);
- if (psize > POOL_LARGE_MAXPAGES)
- psize = POOL_LARGE_MAXPAGES;
- if (flags & PR_WAITOK)
- palloc = &pool_allocator_large_ni[psize-1];
- else
- palloc = &pool_allocator_large[psize-1];
- if (palloc->pa_pagesz == 0) {
- palloc->pa_pagesz = psize * PAGE_SIZE;
- if (flags & PR_WAITOK) {
- palloc->pa_alloc = pool_large_alloc_ni;
- palloc->pa_free = pool_large_free_ni;
- } else {
- palloc->pa_alloc = pool_large_alloc;
- palloc->pa_free = pool_large_free;
- }
- }
- } else {
- palloc = &pool_allocator_nointr;
- }
- }
- if (palloc->pa_pagesz == 0) {
- palloc->pa_pagesz = PAGE_SIZE;
- }
- if (palloc->pa_pagemask == 0) {
- palloc->pa_pagemask = ~(palloc->pa_pagesz - 1);
- palloc->pa_pageshift = ffs(palloc->pa_pagesz) - 1;
- }
if (align == 0)
align = ALIGN(1);
@@ -306,15 +278,43 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
size = sizeof(struct pool_item);
size = roundup(size, align);
-#ifdef DIAGNOSTIC
- if (size > palloc->pa_pagesz)
- panic("pool_init: pool item size (%lu) too large",
- (u_long)size);
-#endif
+
+ if (palloc == NULL) {
+ while (size > pgsize)
+ pgsize <<= 1;
+
+ if (pgsize > PAGE_SIZE) {
+ palloc = ISSET(flags, PR_WAITOK) ?
+ &pool_allocator_large_ni : &pool_allocator_large;
+ } else
+ palloc = &pool_allocator_nointr;
+ } else
+ pgsize = palloc->pa_pagesz ? palloc->pa_pagesz : PAGE_SIZE;
+
+ items = pgsize / size;
+
+ /*
+ * Decide whether to put the page header off page to avoid
+ * wasting too large a part of the page. Off-page page headers
+ * go into an RB tree, so we can match a returned item with
+ * its header based on the page address.
+ */
+ if (pgsize - (size * items) > sizeof(struct pool_item_header)) {
+ flags |= PR_PHINPAGE;
+ off = pgsize - sizeof(struct pool_item_header);
+ } else if (sizeof(struct pool_item_header) * 2 >= size) {
+ flags |= PR_PHINPAGE;
+ off = pgsize - sizeof(struct pool_item_header);
+ items = off / size;
+ } else
+ off = pgsize;
+
+ KASSERT(items > 0);
/*
* Initialize the pool structure.
*/
+ memset(pp, 0, sizeof(*pp));
LIST_INIT(&pp->pr_emptypages);
LIST_INIT(&pp->pr_fullpages);
LIST_INIT(&pp->pr_partpages);
@@ -326,6 +326,10 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
pp->pr_roflags = flags;
pp->pr_flags = 0;
pp->pr_size = size;
+ pp->pr_pgsize = pgsize;
+ pp->pr_pgmask = ~0UL ^ (pgsize - 1);
+ pp->pr_phoffset = off;
+ pp->pr_itemsperpage = items;
pp->pr_align = align;
pp->pr_wchan = wchan;
pp->pr_alloc = palloc;
@@ -337,28 +341,7 @@ pool_init(struct pool *pp, size_t size, u_int align, u_int ioff, int flags,
pp->pr_hardlimit_ratecap.tv_usec = 0;
pp->pr_hardlimit_warning_last.tv_sec = 0;
pp->pr_hardlimit_warning_last.tv_usec = 0;
-
- /*
- * Decide whether to put the page header off page to avoid
- * wasting too large a part of the page. Off-page page headers
- * go into an RB tree, so we can match a returned item with
- * its header based on the page address.
- * We use 1/16 of the page size as the threshold (XXX: tune)
- */
- if (pp->pr_size < palloc->pa_pagesz/16 && pp->pr_size < PAGE_SIZE) {
- /* Use the end of the page for the page header */
- pp->pr_roflags |= PR_PHINPAGE;
- pp->pr_phoffset = off = palloc->pa_pagesz -
- ALIGN(sizeof(struct pool_item_header));
- } else {
- /* The page header will be taken from our page header pool */
- pp->pr_phoffset = 0;
- off = palloc->pa_pagesz;
- RB_INIT(&pp->pr_phtree);
- }
-
- pp->pr_itemsperpage = off / pp->pr_size;
- KASSERT(pp->pr_itemsperpage != 0);
+ RB_INIT(&pp->pr_phtree);
/*
* Use the slack between the chunks and the page header
@@ -882,7 +865,6 @@ pool_prime_page(struct pool *pp, caddr_t storage, struct pool_item_header *ph)
LIST_INSERT_HEAD(&pp->pr_emptypages, ph, ph_pagelist);
XSIMPLEQ_INIT(&ph->ph_itemlist);
ph->ph_page = storage;
- ph->ph_pagesize = pp->pr_alloc->pa_pagesz;
ph->ph_nmissing = 0;
if ((pp->pr_roflags & PR_PHINPAGE) == 0)
RB_INSERT(phtree, &pp->pr_phtree, ph);
@@ -1282,7 +1264,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
int n;
const char *label = pp->pr_wchan;
- page = (caddr_t)((u_long)ph & pp->pr_alloc->pa_pagemask);
+ page = (caddr_t)((u_long)ph & pp->pr_pgmask);
if (page != ph->ph_page &&
(pp->pr_roflags & PR_PHINPAGE) != 0) {
printf("%s: ", label);
@@ -1319,8 +1301,7 @@ pool_chk_page(struct pool *pp, struct pool_item_header *ph, int expected)
}
}
#endif /* DIAGNOSTIC */
- page =
- (caddr_t)((u_long)pi & pp->pr_alloc->pa_pagemask);
+ page = (caddr_t)((u_long)pi & pp->pr_pgmask);
if (page == ph->ph_page)
continue;
@@ -1454,7 +1435,7 @@ sysctl_dopool(int *name, u_int namelen, char *oldp, size_t *oldlenp)
mtx_enter(&pp->pr_mtx);
pi.pr_size = pp->pr_size;
- pi.pr_pgsize = pp->pr_alloc->pa_pagesz;
+ pi.pr_pgsize = pp->pr_pgsize;
pi.pr_itemsperpage = pp->pr_itemsperpage;
pi.pr_npages = pp->pr_npages;
pi.pr_minpages = pp->pr_minpages;
@@ -1483,33 +1464,6 @@ done:
/*
* Pool backend allocators.
- *
- * Each pool has a backend allocator that handles allocation, deallocation
- */
-void *pool_page_alloc(struct pool *, int, int *);
-void pool_page_free(struct pool *, void *);
-
-/*
- * safe for interrupts, name preserved for compat this is the default
- * allocator
- */
-struct pool_allocator pool_allocator_nointr = {
- pool_page_alloc, pool_page_free, 0,
-};
-
-/*
- * XXX - we have at least three different resources for the same allocation
- * and each resource can be depleted. First we have the ready elements in
- * the pool. Then we have the resource (typically a vm_map) for this
- * allocator, then we have physical memory. Waiting for any of these can
- * be unnecessary when any other is freed, but the kernel doesn't support
- * sleeping on multiple addresses, so we have to fake. The caller sleeps on
- * the pool (so that we can be awakened when an item is returned to the pool),
- * but we set PA_WANT on the allocator. When a page is returned to
- * the allocator and PA_WANT is set pool_allocator_free will wakeup all
- * sleeping pools belonging to this allocator. (XXX - thundering herd).
- * We also wake up the allocator in case someone without a pool (malloc)
- * is sleeping waiting for this allocator.
*/
void *
@@ -1524,6 +1478,16 @@ pool_allocator_alloc(struct pool *pp, int flags, int *slowdown)
if (waitok)
mtx_enter(&pp->pr_mtx);
+#ifdef DIAGNOSTIC
+ if (v != NULL && ISSET(pp->pr_roflags, PR_PHINPAGE)) {
+ vaddr_t addr = (vaddr_t)v;
+ if ((addr & pp->pr_pgmask) != addr) {
+ panic("%s: %s page address %p isnt aligned to %u",
+ __func__, pp->pr_wchan, v, pp->pr_pgsize);
+ }
+ }
+#endif
+
return (v);
}
@@ -1543,28 +1507,31 @@ pool_page_alloc(struct pool *pp, int flags, int *slowdown)
kd.kd_waitok = (flags & PR_WAITOK);
kd.kd_slowdown = slowdown;
- return (km_alloc(PAGE_SIZE, &kv_page, pp->pr_crange, &kd));
+ return (km_alloc(pp->pr_pgsize, &kv_page, pp->pr_crange, &kd));
}
void
pool_page_free(struct pool *pp, void *v)
{
- km_free(v, PAGE_SIZE, &kv_page, pp->pr_crange);
+ km_free(v, pp->pr_pgsize, &kv_page, pp->pr_crange);
}
void *
pool_large_alloc(struct pool *pp, int flags, int *slowdown)
{
+ struct kmem_va_mode kv = kv_intrsafe;
struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
void *v;
int s;
+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))
+ kv.kv_align = pp->pr_pgsize;
+
kd.kd_waitok = (flags & PR_WAITOK);
kd.kd_slowdown = slowdown;
s = splvm();
- v = km_alloc(pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange,
- &kd);
+ v = km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd);
splx(s);
return (v);
@@ -1573,26 +1540,39 @@ pool_large_alloc(struct pool *pp, int flags, int *slowdown)
void
pool_large_free(struct pool *pp, void *v)
{
+ struct kmem_va_mode kv = kv_intrsafe;
int s;
+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))
+ kv.kv_align = pp->pr_pgsize;
+
s = splvm();
- km_free(v, pp->pr_alloc->pa_pagesz, &kv_intrsafe, pp->pr_crange);
+ km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
splx(s);
}
void *
pool_large_alloc_ni(struct pool *pp, int flags, int *slowdown)
{
+ struct kmem_va_mode kv = kv_any;
struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))
+ kv.kv_align = pp->pr_pgsize;
+
kd.kd_waitok = (flags & PR_WAITOK);
kd.kd_slowdown = slowdown;
- return (km_alloc(pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange, &kd));
+ return (km_alloc(pp->pr_pgsize, &kv, pp->pr_crange, &kd));
}
void
pool_large_free_ni(struct pool *pp, void *v)
{
- km_free(v, pp->pr_alloc->pa_pagesz, &kv_any, pp->pr_crange);
+ struct kmem_va_mode kv = kv_any;
+
+ if (ISSET(pp->pr_roflags, PR_PHINPAGE))
+ kv.kv_align = pp->pr_pgsize;
+
+ km_free(v, pp->pr_pgsize, &kv, pp->pr_crange);
}