summaryrefslogtreecommitdiffstats
path: root/sys/kern/kern_malloc.c
diff options
context:
space:
mode:
authorart <art@openbsd.org>2008-09-29 12:34:18 +0000
committerart <art@openbsd.org>2008-09-29 12:34:18 +0000
commitd65370a47c86b937a143512262246892a88e8968 (patch)
tree0b365759cf5dda03a8d3b77dcc0468eadf03db66 /sys/kern/kern_malloc.c
parentsort tokens for better readability (diff)
downloadwireguard-openbsd-d65370a47c86b937a143512262246892a88e8968.tar.xz
wireguard-openbsd-d65370a47c86b937a143512262246892a88e8968.zip
Use pools to do allocations for all sizes <= PAGE_SIZE.
This will allow us to escape the limitations of kmem_map. At this moment, the per-type limits are still enforced for all sizes, but we might loosen that limit in the future after some thinking. Original diff from Mickey in kernel/5761 , I massaged it a little to obey the per-type limits. miod@ ok
Diffstat (limited to 'sys/kern/kern_malloc.c')
-rw-r--r--sys/kern/kern_malloc.c352
1 files changed, 123 insertions, 229 deletions
diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c
index a0569254c6a..fae3a643117 100644
--- a/sys/kern/kern_malloc.c
+++ b/sys/kern/kern_malloc.c
@@ -1,7 +1,7 @@
-/* $OpenBSD: kern_malloc.c,v 1.74 2008/02/21 10:40:48 kettenis Exp $ */
-/* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */
+/* $OpenBSD: kern_malloc.c,v 1.75 2008/09/29 12:34:18 art Exp $ */
/*
+ * Copyright (c) 2008 Michael Shalayeff
* Copyright (c) 1987, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -39,6 +39,7 @@
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/time.h>
+#include <sys/pool.h>
#include <sys/rwlock.h>
#include <uvm/uvm_extern.h>
@@ -74,6 +75,10 @@ u_int nkmempages_min = 0;
#endif
u_int nkmempages_max = 0;
+struct pool mallocpl[MINBUCKET + 16];
+char mallocplnames[MINBUCKET + 16][8]; /* wchan for pool */
+char mallocplwarn[MINBUCKET + 16][32]; /* warning message for hard limit */
+
struct kmembuckets bucket[MINBUCKET + 16];
struct kmemstats kmemstats[M_LAST];
struct kmemusage *kmemusage;
@@ -88,16 +93,6 @@ struct rwlock sysctl_kmemlock = RWLOCK_INITIALIZER("sysctlklk");
#ifdef DIAGNOSTIC
/*
- * This structure provides a set of masks to catch unaligned frees.
- */
-const long addrmask[] = { 0,
- 0x00000001, 0x00000003, 0x00000007, 0x0000000f,
- 0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
- 0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
- 0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
-};
-
-/*
* The WEIRD_ADDR is used as known text to copy into free objects so
* that modifications after frees can be detected.
*/
@@ -133,6 +128,46 @@ struct timeval malloc_errintvl = { 5, 0 };
struct timeval malloc_lasterr;
#endif
+void *malloc_page_alloc(struct pool *, int);
+void malloc_page_free(struct pool *, void *);
+struct pool_allocator pool_allocator_malloc = {
+ malloc_page_alloc, malloc_page_free, 0,
+};
+
+void *
+malloc_page_alloc(struct pool *pp, int flags)
+{
+ void *v = uvm_km_getpage(flags & M_NOWAIT? 0 : 1);
+ struct vm_page *pg;
+ paddr_t pa;
+
+ if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa))
+ panic("malloc_page_alloc: pmap_extract failed");
+
+ pg = PHYS_TO_VM_PAGE(pa);
+ if (pg == NULL)
+ panic("malloc_page_alloc: no page");
+ pg->wire_count = BUCKETINDX(pp->pr_size);
+
+ return v;
+}
+
+void
+malloc_page_free(struct pool *pp, void *v)
+{
+ struct vm_page *pg;
+ paddr_t pa;
+
+ if (!pmap_extract(pmap_kernel(), (vaddr_t)v, &pa))
+ panic("malloc_page_free: pmap_extract failed");
+
+ pg = PHYS_TO_VM_PAGE(pa);
+ if (pg == NULL)
+ panic("malloc_page_free: no page");
+ pg->wire_count = 1;
+ uvm_km_putpage(v);
+}
+
/*
* Allocate a block of memory
*/
@@ -141,15 +176,9 @@ malloc(unsigned long size, int type, int flags)
{
struct kmembuckets *kbp;
struct kmemusage *kup;
- struct freelist *freep;
- long indx, npg, allocsize;
+ vsize_t indx, allocsize;
int s;
- caddr_t va, cp, savedlist;
-#ifdef DIAGNOSTIC
- int32_t *end, *lp;
- int copysize;
- char *savedtype;
-#endif
+ void *va;
#ifdef KMEMSTATS
struct kmemstats *ksp = &kmemstats[type];
@@ -158,7 +187,7 @@ malloc(unsigned long size, int type, int flags)
#endif
#ifdef MALLOC_DEBUG
- if (debug_malloc(size, type, flags, (void **)&va)) {
+ if (debug_malloc(size, type, flags, &va)) {
if ((flags & M_ZERO) && va != NULL)
memset(va, 0, size);
return (va);
@@ -190,19 +219,10 @@ malloc(unsigned long size, int type, int flags)
ksp->ks_limblocks++;
tsleep(ksp, PSWP+2, memname[type], 0);
}
- ksp->ks_size |= 1 << indx;
#endif
-#ifdef DIAGNOSTIC
- copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
-#endif
- if (kbp->kb_next == NULL) {
- if (size > MAXALLOCSAVE)
- allocsize = round_page(size);
- else
- allocsize = 1 << indx;
- npg = atop(round_page(allocsize));
- va = (caddr_t) uvm_km_kmemalloc(kmem_map, NULL,
- (vsize_t)ptoa(npg),
+ if (size > MAXALLOCSAVE) {
+ allocsize = round_page(size);
+ va = (void *) uvm_km_kmemalloc(kmem_map, NULL, allocsize,
((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) |
((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0));
if (va == NULL) {
@@ -220,120 +240,35 @@ malloc(unsigned long size, int type, int flags)
return (NULL);
}
#ifdef KMEMSTATS
- kbp->kb_total += kbp->kb_elmpercl;
+ kbp->kb_total++;
+ kbp->kb_calls++;
#endif
kup = btokup(va);
kup->ku_indx = indx;
- if (allocsize > MAXALLOCSAVE) {
- kup->ku_pagecnt = npg;
-#ifdef KMEMSTATS
- ksp->ks_memuse += allocsize;
-#endif
- goto out;
- }
-#ifdef KMEMSTATS
- kup->ku_freecnt = kbp->kb_elmpercl;
- kbp->kb_totalfree += kbp->kb_elmpercl;
-#endif
- /*
- * Just in case we blocked while allocating memory,
- * and someone else also allocated memory for this
- * bucket, don't assume the list is still empty.
- */
- savedlist = kbp->kb_next;
- kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize;
- for (;;) {
- freep = (struct freelist *)cp;
-#ifdef DIAGNOSTIC
- /*
- * Copy in known text to detect modification
- * after freeing.
- */
- end = (int32_t *)&cp[copysize];
- for (lp = (int32_t *)cp; lp < end; lp++)
- *lp = WEIRD_ADDR;
- freep->type = M_FREE;
-#endif /* DIAGNOSTIC */
- if (cp <= va)
- break;
- cp -= allocsize;
- freep->next = cp;
- }
- freep->next = savedlist;
- if (savedlist == NULL)
- kbp->kb_last = (caddr_t)freep;
- }
- va = kbp->kb_next;
- kbp->kb_next = ((struct freelist *)va)->next;
-#ifdef DIAGNOSTIC
- freep = (struct freelist *)va;
- savedtype = (unsigned)freep->type < M_LAST ?
- memname[freep->type] : "???";
- if (kbp->kb_next) {
- int rv;
- vaddr_t addr = (vaddr_t)kbp->kb_next;
-
- vm_map_lock(kmem_map);
- rv = uvm_map_checkprot(kmem_map, addr,
- addr + sizeof(struct freelist), VM_PROT_WRITE);
- vm_map_unlock(kmem_map);
-
- if (!rv) {
- printf("%s %d of object %p size 0x%lx %s %s (invalid addr %p)\n",
- "Data modified on freelist: word",
- (int32_t *)&kbp->kb_next - (int32_t *)kbp, va, size,
- "previous type", savedtype, kbp->kb_next);
- kbp->kb_next = NULL;
- }
- }
-
- /* Fill the fields that we've used with WEIRD_ADDR */
-#if BYTE_ORDER == BIG_ENDIAN
- freep->type = WEIRD_ADDR >> 16;
-#endif
-#if BYTE_ORDER == LITTLE_ENDIAN
- freep->type = (short)WEIRD_ADDR;
-#endif
- end = (int32_t *)&freep->next +
- (sizeof(freep->next) / sizeof(int32_t));
- for (lp = (int32_t *)&freep->next; lp < end; lp++)
- *lp = WEIRD_ADDR;
-
- /* and check that the data hasn't been modified. */
- end = (int32_t *)&va[copysize];
- for (lp = (int32_t *)va; lp < end; lp++) {
- if (*lp == WEIRD_ADDR)
- continue;
- printf("%s %d of object %p size 0x%lx %s %s (0x%x != 0x%x)\n",
- "Data modified on freelist: word", lp - (int32_t *)va,
- va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
- break;
+ kup->ku_pagecnt = atop(allocsize);
+ } else {
+ allocsize = mallocpl[indx].pr_size;
+ va = pool_get(&mallocpl[indx], PR_LIMITFAIL |
+ (flags & M_NOWAIT ? 0 : PR_WAITOK));
+ if (!va && (flags & (M_NOWAIT|M_CANFAIL)) == 0)
+ panic("malloc: out of space in kmem pool");
}
- freep->spare0 = 0;
-#endif /* DIAGNOSTIC */
#ifdef KMEMSTATS
- kup = btokup(va);
- if (kup->ku_indx != indx)
- panic("malloc: wrong bucket");
- if (kup->ku_freecnt == 0)
- panic("malloc: lost data");
- kup->ku_freecnt--;
- kbp->kb_totalfree--;
- ksp->ks_memuse += 1 << indx;
-out:
- kbp->kb_calls++;
- ksp->ks_inuse++;
- ksp->ks_calls++;
- if (ksp->ks_memuse > ksp->ks_maxused)
- ksp->ks_maxused = ksp->ks_memuse;
-#else
-out:
+ if (va) {
+ ksp->ks_memuse += allocsize;
+ if (ksp->ks_memuse > ksp->ks_maxused)
+ ksp->ks_maxused = ksp->ks_memuse;
+ ksp->ks_size |= 1 << indx;
+ ksp->ks_inuse++;
+ ksp->ks_calls++;
+ }
#endif
splx(s);
if ((flags & M_ZERO) && va != NULL)
memset(va, 0, size);
+
return (va);
}
@@ -345,14 +280,10 @@ free(void *addr, int type)
{
struct kmembuckets *kbp;
struct kmemusage *kup;
- struct freelist *freep;
+ struct vm_page *pg;
+ paddr_t pa;
long size;
int s;
-#ifdef DIAGNOSTIC
- caddr_t cp;
- int32_t *end, *lp;
- long alloc, copysize;
-#endif
#ifdef KMEMSTATS
struct kmemstats *ksp = &kmemstats[type];
#endif
@@ -362,93 +293,47 @@ free(void *addr, int type)
return;
#endif
-#ifdef DIAGNOSTIC
- if (addr < (void *)kmembase || addr >= (void *)kmemlimit)
- panic("free: non-malloced addr %p type %s", addr,
- memname[type]);
-#endif
-
- kup = btokup(addr);
- size = 1 << kup->ku_indx;
- kbp = &bucket[kup->ku_indx];
s = splvm();
+ if (addr >= (void *)kmembase && addr < (void *)kmemlimit) {
+ kup = btokup(addr);
+ kbp = &bucket[kup->ku_indx];
+ size = ptoa(kup->ku_pagecnt);
#ifdef DIAGNOSTIC
- /*
- * Check for returns of data that do not point to the
- * beginning of the allocation.
- */
- if (size > PAGE_SIZE)
- alloc = addrmask[BUCKETINDX(PAGE_SIZE)];
- else
- alloc = addrmask[kup->ku_indx];
- if (((u_long)addr & alloc) != 0)
- panic("free: unaligned addr %p, size %ld, type %s, mask %ld",
- addr, size, memname[type], alloc);
+ if ((vaddr_t)addr != round_page((vaddr_t)addr))
+ panic("free: unaligned addr %p, size %ld, type %s",
+ addr, size, memname[type]);
#endif /* DIAGNOSTIC */
- if (size > MAXALLOCSAVE) {
- uvm_km_free(kmem_map, (vaddr_t)addr, ptoa(kup->ku_pagecnt));
+ uvm_km_free(kmem_map, (vaddr_t)addr, size);
#ifdef KMEMSTATS
- size = kup->ku_pagecnt << PGSHIFT;
- ksp->ks_memuse -= size;
kup->ku_indx = 0;
kup->ku_pagecnt = 0;
- if (ksp->ks_memuse + size >= ksp->ks_limit &&
- ksp->ks_memuse < ksp->ks_limit)
- wakeup(ksp);
- ksp->ks_inuse--;
- kbp->kb_total -= 1;
-#endif
- splx(s);
- return;
- }
- freep = (struct freelist *)addr;
+ kbp->kb_total--;
+#endif
+ } else {
+ if (!pmap_extract(pmap_kernel(), (vaddr_t)addr, &pa))
+ panic("free: pmap_extract failed");
+ pg = PHYS_TO_VM_PAGE(pa);
+ if (pg == NULL)
+ panic("free: no page");
#ifdef DIAGNOSTIC
- /*
- * Check for multiple frees. Use a quick check to see if
- * it looks free before laboriously searching the freelist.
- */
- if (freep->spare0 == WEIRD_ADDR) {
- for (cp = kbp->kb_next; cp;
- cp = ((struct freelist *)cp)->next) {
- if (addr != cp)
- continue;
- printf("multiply freed item %p\n", addr);
- panic("free: duplicated free");
- }
+ if (pg->pg_flags & PQ_FREE)
+ panic("free: page %p is free", pg);
+ if (pg->wire_count < MINBUCKET ||
+ (1 << pg->wire_count) > MAXALLOCSAVE)
+ panic("free: invalid page bucket %d", pg->wire_count);
+#endif
+ size = mallocpl[pg->wire_count].pr_size;
+ pool_put(&mallocpl[pg->wire_count], addr);
}
- /*
- * Copy in known text to detect modification after freeing
- * and to make it look free. Also, save the type being freed
- * so we can list likely culprit if modification is detected
- * when the object is reallocated.
- */
- copysize = size < MAX_COPY ? size : MAX_COPY;
- end = (int32_t *)&((caddr_t)addr)[copysize];
- for (lp = (int32_t *)addr; lp < end; lp++)
- *lp = WEIRD_ADDR;
- freep->type = type;
-#endif /* DIAGNOSTIC */
+
#ifdef KMEMSTATS
- kup->ku_freecnt++;
- if (kup->ku_freecnt >= kbp->kb_elmpercl) {
- if (kup->ku_freecnt > kbp->kb_elmpercl)
- panic("free: multiple frees");
- else if (kbp->kb_totalfree > kbp->kb_highwat)
- kbp->kb_couldfree++;
- }
- kbp->kb_totalfree++;
+ ksp->ks_inuse--;
ksp->ks_memuse -= size;
if (ksp->ks_memuse + size >= ksp->ks_limit &&
ksp->ks_memuse < ksp->ks_limit)
- wakeup(ksp);
- ksp->ks_inuse--;
+ wakeup(ksp); /* unnecessary for pool, whatever */
#endif
- if (kbp->kb_next == NULL)
- kbp->kb_next = addr;
- else
- ((struct freelist *)kbp->kb_last)->next = addr;
- freep->next = NULL;
- kbp->kb_last = addr;
+
splx(s);
}
@@ -507,9 +392,7 @@ void
kmeminit(void)
{
vaddr_t base, limit;
-#ifdef KMEMSTATS
- long indx;
-#endif
+ int i;
#ifdef DIAGNOSTIC
if (sizeof(struct freelist) > (1 << MINBUCKET))
@@ -529,16 +412,27 @@ kmeminit(void)
kmemlimit = (char *)limit;
kmemusage = (struct kmemusage *) uvm_km_zalloc(kernel_map,
(vsize_t)(nkmempages * sizeof(struct kmemusage)));
+
+ /*
+ * init all the sub-page pools
+ */
+ for (i = MINBUCKET; (1 << i) <= MAXALLOCSAVE; i++) {
+ snprintf(mallocplnames[i], sizeof(mallocplnames[i]),
+ "kmem%d", i);
+ pool_init(&mallocpl[i], 1 << i, 1 << i, 0, PR_LIMITFAIL,
+ mallocplnames[i], &pool_allocator_malloc);
+ }
+
#ifdef KMEMSTATS
- for (indx = 0; indx < MINBUCKET + 16; indx++) {
- if (1 << indx >= PAGE_SIZE)
- bucket[indx].kb_elmpercl = 1;
+ for (i = 0; i < MINBUCKET + 16; i++) {
+ if (1 << i >= PAGE_SIZE)
+ bucket[i].kb_elmpercl = 1;
else
- bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx);
- bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
+ bucket[i].kb_elmpercl = PAGE_SIZE / (1 << i);
+ bucket[i].kb_highwat = 5 * bucket[i].kb_elmpercl;
}
- for (indx = 0; indx < M_LAST; indx++)
- kmemstats[indx].ks_limit = nkmempages * PAGE_SIZE * 6 / 10;
+ for (i = 0; i < M_LAST; i++)
+ kmemstats[i].ks_limit = nkmempages * PAGE_SIZE * 6 / 10;;
#endif
#ifdef MALLOC_DEBUG
debug_malloc_init();
@@ -579,7 +473,6 @@ sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
case KERN_MALLOC_BUCKET:
bcopy(&bucket[BUCKETINDX(name[1])], &kb, sizeof(kb));
- kb.kb_next = kb.kb_last = 0;
return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb)));
case KERN_MALLOC_KMEMSTATS:
#ifdef KMEMSTATS
@@ -607,8 +500,9 @@ sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
}
memall = malloc(totlen + M_LAST, M_SYSCTL,
M_WAITOK|M_ZERO);
+ bzero(memall, totlen + M_LAST);
for (siz = 0, i = 0; i < M_LAST; i++) {
- snprintf(memall + siz,
+ snprintf(memall + siz,
totlen + M_LAST - siz,
"%s,", memname[i] ? memname[i] : "");
siz += strlen(memall + siz);
@@ -666,7 +560,7 @@ malloc_printit(int (*pr)(const char *, ...))
(*pr)("%15s %5ld %6ldK %7ldK %6ldK %9ld %8d %8d\n",
memname[i], km->ks_inuse, km->ks_memuse / 1024,
- km->ks_maxused / 1024, km->ks_limit / 1024,
+ km->ks_maxused / 1024, km->ks_limit / 1024,
km->ks_calls, km->ks_limblocks, km->ks_mapblocks);
}
#else