Import of uvm from NetBSD. Some local changes, some code disabled

author: art <art@openbsd.org> 1999-02-26 01:30:10 +0000
committer: art <art@openbsd.org> 1999-02-26 01:30:10 +0000
commit: cd7ee8acd30fe8d4b178a6bcda689f469732e4bc (patch)
tree: 00ca09c99c7798adde771b6c8afd33bbf1e14fc0
parent: convert to mdoc, document changes from db 1.8.6 (diff)
download: wireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.tar.xz
wireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.zip
46 files changed, 23052 insertions, 0 deletions
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
new file mode 100644
index 00000000000..4f4d5164527
--- /dev/null
+++ b/sys/uvm/uvm.h
@@ -0,0 +1,181 @@
+/*	$NetBSD: uvm.h,v 1.13 1998/10/11 22:59:53 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm.h,v 1.1.2.14 1998/02/02 20:07:19 chuck Exp
+ */
+
+#ifndef _UVM_UVM_H_
+#define _UVM_UVM_H_
+
+#include <uvm/uvm_extern.h>
+
+#include <uvm/uvm_stat.h>
+
+/*
+ * pull in prototypes
+ */
+
+#include <uvm/uvm_amap.h>
+#include <uvm/uvm_aobj.h>
+#include <uvm/uvm_fault.h>
+#include <uvm/uvm_glue.h>
+#include <uvm/uvm_km.h>
+#include <uvm/uvm_loan.h>
+#include <uvm/uvm_map.h>
+#include <uvm/uvm_object.h>
+#include <uvm/uvm_page.h>
+#include <uvm/uvm_pager.h>
+#include <uvm/uvm_pdaemon.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * pull in VM_NFREELIST
+ */
+#include <machine/vmparam.h>
+
+/*
+ * uvm structure (vm global state: collected in one structure for ease
+ * of reference...)
+ */
+
+struct uvm {
+	/* vm_page related parameters */
+		/* vm_page queues */
+	struct pglist page_free[VM_NFREELIST];	/* unallocated pages */
+	struct pglist page_active;	/* allocated pages, in use */
+	struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
+	struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
+	simple_lock_data_t pageqlock;	/* lock for active/inactive page q */
+	simple_lock_data_t fpageqlock;	/* lock for free page q */
+		/* page daemon trigger */
+	int pagedaemon;			/* daemon sleeps on this */
+	struct proc *pagedaemon_proc;	/* daemon's pid */
+	simple_lock_data_t pagedaemon_lock;
+		/* page hash */
+	struct pglist *page_hash;	/* page hash table (vp/off->page) */
+	int page_nhash;			/* number of buckets */
+	int page_hashmask;		/* hash mask */
+	simple_lock_data_t hashlock;	/* lock on page_hash array */
+	/* anon stuff */
+	struct vm_anon *afree;		/* anon free list */
+	simple_lock_data_t afreelock; 	/* lock on anon free list */
+
+	/* static kernel map entry pool */
+	vm_map_entry_t kentry_free;	/* free page pool */
+	simple_lock_data_t kentry_lock;
+
+	/* aio_done is locked by uvm.pagedaemon_lock and splbio! */
+	struct uvm_aiohead aio_done;	/* done async i/o reqs */
+
+	/* pager VM area bounds */
+	vaddr_t pager_sva;		/* start of pager VA area */
+	vaddr_t pager_eva;		/* end of pager VA area */
+
+	/* kernel object: to support anonymous pageable kernel memory */
+	struct uvm_object *kernel_object;
+};
+
+extern struct uvm uvm;
+
+/*
+ * historys
+ */
+
+UVMHIST_DECL(maphist);
+UVMHIST_DECL(pdhist);
+
+/*
+ * vm_map_entry etype bits:
+ */
+
+#define UVM_ET_OBJ		0x01	/* it is a uvm_object */
+#define UVM_ET_SUBMAP		0x02	/* it is a vm_map submap */
+#define UVM_ET_COPYONWRITE 	0x04	/* copy_on_write */
+#define UVM_ET_NEEDSCOPY	0x08	/* needs_copy */
+
+#define UVM_ET_ISOBJ(E)		(((E)->etype & UVM_ET_OBJ) != 0)
+#define UVM_ET_ISSUBMAP(E)	(((E)->etype & UVM_ET_SUBMAP) != 0)
+#define UVM_ET_ISCOPYONWRITE(E)	(((E)->etype & UVM_ET_COPYONWRITE) != 0)
+#define UVM_ET_ISNEEDSCOPY(E)	(((E)->etype & UVM_ET_NEEDSCOPY) != 0)
+
+/*
+ * macros
+ */
+
+/*
+ * UVM_UNLOCK_AND_WAIT: atomic unlock+wait... front end for the 
+ * (poorly named) thread_sleep_msg function.
+ */
+
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+
+#define UVM_UNLOCK_AND_WAIT(event,lock,intr,msg, timo) \
+	thread_sleep_msg(event,lock,intr,msg, timo)
+
+#else
+
+#define UVM_UNLOCK_AND_WAIT(event,lock,intr,msg, timo) \
+	thread_sleep_msg(event,NULL,intr,msg, timo)
+
+#endif
+
+/*
+ * UVM_PAGE_OWN: track page ownership (only if UVM_PAGE_TRKOWN)
+ */
+
+#if defined(UVM_PAGE_TRKOWN)
+
+#define UVM_PAGE_OWN(PG, TAG) uvm_page_own(PG, TAG)
+
+#else /* UVM_PAGE_TRKOWN */
+
+#define UVM_PAGE_OWN(PG, TAG) /* nothing */
+
+#endif /* UVM_PAGE_TRKOWN */
+
+/*
+ * pull in inlines
+ */
+
+#include <uvm/uvm_amap_i.h>
+#include <uvm/uvm_fault_i.h>
+#include <uvm/uvm_map_i.h>
+#include <uvm/uvm_page_i.h>
+#include <uvm/uvm_pager_i.h>
+
+#endif /* _UVM_UVM_H_ */
diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c
new file mode 100644
index 00000000000..8685f643392
--- /dev/null
+++ b/sys/uvm/uvm_amap.c
@@ -0,0 +1,1066 @@
+/*	$NetBSD: uvm_amap.c,v 1.19 1999/01/28 14:46:27 chuck Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_amap.c: amap operations
+ */
+
+/*
+ * this file contains functions that perform operations on amaps.  see
+ * uvm_amap.h for a brief explanation of the role of amaps in uvm.
+ */
+
+#undef UVM_AMAP_INLINE		/* enable/disable amap inlines */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_AMAP_C		/* ensure disabled inlines are in */
+#include <uvm/uvm.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * pool for allocation of vm_map structures.  note that the pool has
+ * its own simplelock for its protection.  also note that in order to
+ * avoid an endless loop, the amap pool's allocator cannot allocate
+ * memory from an amap (it currently goes through the kernel uobj, so
+ * we are ok).
+ */
+
+struct pool uvm_amap_pool;
+
+/*
+ * local functions
+ */
+
+static struct vm_amap *amap_alloc1 __P((int, int, int));
+
+#ifdef UVM_AMAP_PPREF
+/*
+ * what is ppref?   ppref is an _optional_ amap feature which is used
+ * to keep track of reference counts on a per-page basis.  it is enabled
+ * when UVM_AMAP_PPREF is defined.
+ *
+ * when enabled, an array of ints is allocated for the pprefs.  this
+ * array is allocated only when a partial reference is added to the
+ * map (either by unmapping part of the amap, or gaining a reference
+ * to only a part of an amap).  if the malloc of the array fails
+ * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
+ * that we tried to do ppref's but couldn't alloc the array so just
+ * give up (after all, this is an optional feature!).
+ *
+ * the array is divided into page sized "chunks."   for chunks of length 1,
+ * the chunk reference count plus one is stored in that chunk's slot.
+ * for chunks of length > 1 the first slot contains (the reference count
+ * plus one) * -1.    [the negative value indicates that the length is
+ * greater than one.]   the second slot of the chunk contains the length
+ * of the chunk.   here is an example:
+ *
+ * actual REFS:  2  2  2  2  3  1  1  0  0  0  4  4  0  1  1  1
+ *       ppref: -3  4  x  x  4 -2  2 -1  3  x -5  2  1 -2  3  x
+ *              <----------><-><----><-------><----><-><------->
+ * (x = don't care)
+ *
+ * this allows us to allow one int to contain the ref count for the whole
+ * chunk.    note that the "plus one" part is needed because a reference
+ * count of zero is neither positive or negative (need a way to tell
+ * if we've got one zero or a bunch of them).
+ * 
+ * here are some in-line functions to help us.
+ */
+
+static __inline void pp_getreflen __P((int *, int, int *, int *));
+static __inline void pp_setreflen __P((int *, int, int, int));
+
+/*
+ * pp_getreflen: get the reference and length for a specific offset
+ *
+ * => ppref's amap must be locked
+ */
+static __inline void
+pp_getreflen(ppref, offset, refp, lenp)
+	int *ppref, offset, *refp, *lenp;
+{
+
+	if (ppref[offset] > 0) {		/* chunk size must be 1 */
+		*refp = ppref[offset] - 1;	/* don't forget to adjust */
+		*lenp = 1;
+	} else {
+		*refp = (ppref[offset] * -1) - 1;
+		*lenp = ppref[offset+1];
+	}
+}
+
+/*
+ * pp_setreflen: set the reference and length for a specific offset
+ *
+ * => ppref's amap must be locked
+ */
+static __inline void
+pp_setreflen(ppref, offset, ref, len)
+	int *ppref, offset, ref, len;
+{
+	if (len == 1) {
+		ppref[offset] = ref + 1;
+	} else {
+		ppref[offset] = (ref + 1) * -1;
+		ppref[offset+1] = len;
+	}
+}
+#endif
+
+/*
+ * amap_init: called at boot time to init global amap data structures
+ */
+
+void
+amap_init()
+
+{
+	/*
+	 * Initialize the vm_amap pool.
+	 */
+	pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
+	    "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr, 
+	    M_UVMAMAP);
+}
+
+/*
+ * amap_alloc1: internal function that allocates an amap, but does not
+ *	init the overlay.
+ *
+ * => lock on returned amap is init'd
+ */
+static inline struct vm_amap *
+amap_alloc1(slots, padslots, waitf)
+	int slots, padslots, waitf;
+{
+	struct vm_amap *amap;
+	int totalslots = slots + padslots;
+
+	amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
+	if (amap == NULL)
+		return(NULL);
+
+	simple_lock_init(&amap->am_l);
+	amap->am_ref = 1;
+	amap->am_flags = 0;
+#ifdef UVM_AMAP_PPREF
+	amap->am_ppref = NULL;
+#endif
+	amap->am_maxslot = totalslots;
+	amap->am_nslot = slots;
+	amap->am_nused = 0;
+	MALLOC(amap->am_slots,  int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
+	if (amap->am_slots) {
+		MALLOC(amap->am_bckptr, int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
+		if (amap->am_bckptr) {
+			MALLOC(amap->am_anon, struct vm_anon **, 
+			    totalslots * sizeof(struct vm_anon *), M_UVMAMAP, waitf);
+		}
+	}
+
+	if (amap->am_anon)
+		return(amap);
+
+	if (amap->am_slots) {
+		FREE(amap->am_slots, M_UVMAMAP);
+		if (amap->am_bckptr)
+			FREE(amap->am_bckptr, M_UVMAMAP);
+	}
+	pool_put(&uvm_amap_pool, amap);
+	return (NULL);
+}
+
+/*
+ * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
+ *
+ * => caller should ensure sz is a multiple of PAGE_SIZE
+ * => reference count to new amap is set to one
+ * => new amap is returned unlocked
+ */
+
+struct vm_amap *
+amap_alloc(sz, padsz, waitf)
+	vaddr_t sz, padsz;
+	int waitf;
+{
+	struct vm_amap *amap;
+	int slots, padslots;
+	UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
+
+	AMAP_B2SLOT(slots, sz);		/* load slots */
+	AMAP_B2SLOT(padslots, padsz);
+
+	amap = amap_alloc1(slots, padslots, waitf);
+	if (amap)
+		bzero(amap->am_anon, (slots + padslots) * sizeof(struct vm_anon *));
+
+	UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
+	return(amap);
+}
+
+
+/*
+ * amap_free: free an amap
+ *
+ * => the amap must be locked (mainly for simplelock accounting)
+ * => the amap should have a zero reference count and be empty
+ */
+void
+amap_free(amap)
+	struct vm_amap *amap;
+{
+	UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
+
+#ifdef DIAGNOSTIC
+	if (amap->am_ref || amap->am_nused)
+		panic("amap_free");
+#endif
+
+	FREE(amap->am_slots, M_UVMAMAP);
+	FREE(amap->am_bckptr, M_UVMAMAP);
+	FREE(amap->am_anon, M_UVMAMAP);
+#ifdef UVM_AMAP_PPREF
+	if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
+		FREE(amap->am_ppref, M_UVMAMAP);
+#endif
+	amap_unlock(amap);	/* mainly for lock debugging */
+	pool_put(&uvm_amap_pool, amap);
+
+	UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
+}
+
+/*
+ * amap_extend: extend the size of an amap (if needed)
+ *
+ * => called from uvm_map when we want to extend an amap to cover
+ *    a new mapping (rather than allocate a new one)
+ * => amap should be unlocked (we will lock it)
+ * => to safely extend an amap it should have a reference count of
+ *    one (thus it can't be shared)
+ * => XXXCDC: needs a waitflag or failure return value?
+ * => XXXCDC: support padding at this level?
+ */
+void
+amap_extend(entry, addsize)
+	vm_map_entry_t entry;
+	vsize_t addsize;
+{
+	struct vm_amap *amap = entry->aref.ar_amap;
+	int slotoff = entry->aref.ar_pageoff;
+	int slotmapped, slotadd, slotneed;
+#ifdef UVM_AMAP_PPREF
+	int *newppref, *oldppref;
+#endif
+	u_int *newsl, *newbck, *oldsl, *oldbck;
+	struct vm_anon **newover, **oldover;
+	int slotadded;
+	UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "  (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
+
+	/*
+	 * first, determine how many slots we need in the amap.  don't
+	 * forget that ar_pageoff could be non-zero: this means that
+	 * there are some unused slots before us in the amap.
+	 */
+
+	amap_lock(amap);					/* lock! */
+
+	AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
+	AMAP_B2SLOT(slotadd, addsize);			/* slots to add */
+	slotneed = slotoff + slotmapped + slotadd;
+
+	/*
+	 * case 1: we already have enough slots in the map and thus
+	 * only need to bump the reference counts on the slots we are
+	 * adding.
+	 */
+
+	if (amap->am_nslot >= slotneed) {
+#ifdef UVM_AMAP_PPREF
+		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+			amap_pp_adjref(amap, slotoff + slotmapped, addsize, 1);
+		}
+#endif
+		amap_unlock(amap);
+		UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d", 
+		    amap, slotneed, 0, 0);
+		return;				/* done! */
+	}
+
+	/*
+	 * case 2: we pre-allocated slots for use and we just need to
+	 * bump nslot up to take account for these slots.
+	 */
+	if (amap->am_maxslot >= slotneed) {
+#ifdef UVM_AMAP_PPREF
+		if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+			if ((slotoff + slotmapped) < amap->am_nslot)
+				amap_pp_adjref(amap, slotoff + slotmapped, 
+				    (amap->am_nslot - (slotoff + slotmapped)) <<
+				    PAGE_SHIFT, 1);
+			pp_setreflen(amap->am_ppref, amap->am_nslot, 1, 
+			   slotneed - amap->am_nslot);
+		}
+#endif
+		amap->am_nslot = slotneed;
+		amap_unlock(amap);
+		/*
+		 * no need to zero am_anon since that was done at
+		 * alloc time and we never shrink an allocation.
+		 */
+		UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d", 
+		    amap, slotneed, 0, 0);
+		return;
+	}
+
+	/*
+	 * case 3: we need to malloc a new amap and copy all the amap
+	 * data over from old amap to the new one.
+	 *
+	 * XXXCDC: could we take advantage of a kernel realloc()?  
+	 */
+
+	amap_unlock(amap);	/* unlock in case we sleep in malloc */
+#ifdef UVM_AMAP_PPREF
+	newppref = NULL;
+	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+		MALLOC(newppref, int *, slotneed * sizeof(int), M_UVMAMAP,
+		    M_NOWAIT);
+		if (newppref == NULL) {
+			/* give up if malloc fails */
+			FREE(amap->am_ppref, M_UVMAMAP);
+			    amap->am_ppref = PPREF_NONE;
+		}
+	}
+#endif
+	MALLOC(newsl, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
+	MALLOC(newbck, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
+	MALLOC(newover, struct vm_anon **, slotneed * sizeof(struct vm_anon *),
+						   M_UVMAMAP, M_WAITOK);
+	amap_lock(amap);			/* re-lock! */
+
+#ifdef DIAGNOSTIC
+	if (amap->am_maxslot >= slotneed)
+		panic("amap_extend: amap changed during malloc");
+#endif
+
+	/*
+	 * now copy everything over to new malloc'd areas...
+	 */
+
+	slotadded = slotneed - amap->am_nslot;
+
+	/* do am_slots */
+	oldsl = amap->am_slots;
+	bcopy(oldsl, newsl, sizeof(int) * amap->am_nused);
+	amap->am_slots = newsl;
+
+	/* do am_anon */
+	oldover = amap->am_anon;
+	bcopy(oldover, newover, sizeof(struct vm_anon *) * amap->am_nslot);
+	bzero(newover + amap->am_nslot, sizeof(struct vm_anon *) * slotadded);
+	amap->am_anon = newover;
+
+	/* do am_bckptr */
+	oldbck = amap->am_bckptr;
+	bcopy(oldbck, newbck, sizeof(int) * amap->am_nslot);
+	bzero(newbck + amap->am_nslot, sizeof(int) * slotadded); /* XXX: needed? */
+	amap->am_bckptr = newbck;
+
+#ifdef UVM_AMAP_PPREF
+	/* do ppref */
+	oldppref = amap->am_ppref;
+	if (newppref) {
+		bcopy(oldppref, newppref, sizeof(int) * amap->am_nslot);
+		bzero(newppref + amap->am_nslot, sizeof(int) * slotadded);
+		amap->am_ppref = newppref;
+		if ((slotoff + slotmapped) < amap->am_nslot)
+			amap_pp_adjref(amap, slotoff + slotmapped, 
+			    (amap->am_nslot - (slotoff + slotmapped)) <<
+			    PAGE_SHIFT, 1);
+		pp_setreflen(newppref, amap->am_nslot, 1, slotadded);
+	}
+#endif
+
+	/* update master values */
+	amap->am_nslot = slotneed;
+	amap->am_maxslot = slotneed;
+
+	/* unlock */
+	amap_unlock(amap);
+
+	/* and free */
+	FREE(oldsl, M_UVMAMAP);
+	FREE(oldbck, M_UVMAMAP);
+	FREE(oldover, M_UVMAMAP);
+#ifdef UVM_AMAP_PPREF
+	if (oldppref && oldppref != PPREF_NONE)
+		FREE(oldppref, M_UVMAMAP);
+#endif
+	UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d", 
+	    amap, slotneed, 0, 0);
+}
+
+/*
+ * amap_share_protect: change protection of anons in a shared amap
+ *
+ * for shared amaps, given the current data structure layout, it is
+ * not possible for us to directly locate all maps referencing the
+ * shared anon (to change the protection).  in order to protect data
+ * in shared maps we use pmap_page_protect().  [this is useful for IPC
+ * mechanisms like map entry passing that may want to write-protect
+ * all mappings of a shared amap.]  we traverse am_anon or am_slots
+ * depending on the current state of the amap.
+ *
+ * => entry's map and amap must be locked by the caller
+ */
+void
+amap_share_protect(entry, prot)
+	vm_map_entry_t entry;
+	vm_prot_t prot;
+{
+	struct vm_amap *amap = entry->aref.ar_amap;
+	int slots, lcv, slot, stop;
+
+	AMAP_B2SLOT(slots, (entry->end - entry->start));
+	stop = entry->aref.ar_pageoff + slots;
+
+	if (slots < amap->am_nused) {
+		/* cheaper to traverse am_anon */
+		for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
+			if (amap->am_anon[lcv] == NULL)
+				continue;
+			if (amap->am_anon[lcv]->u.an_page != NULL)
+				pmap_page_protect(
+				    PMAP_PGARG(amap->am_anon[lcv]->u.an_page),
+				prot);
+		}
+		return;
+	}
+
+	/* cheaper to traverse am_slots */
+	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+		slot = amap->am_slots[lcv];
+		if (slot < entry->aref.ar_pageoff || slot >= stop)
+			continue;
+		if (amap->am_anon[slot]->u.an_page != NULL)
+			pmap_page_protect(
+			    PMAP_PGARG(amap->am_anon[slot]->u.an_page), prot);
+	}
+	return;
+}
+
+/*
+ * amap_wipeout: wipeout all anon's in an amap; then free the amap!
+ *
+ * => called from amap_unref when the final reference to an amap is 
+ *	discarded (i.e. when reference count == 1)
+ * => the amap should be locked (by the caller)
+ */
+
+void
+amap_wipeout(amap)
+	struct vm_amap *amap;
+{
+	int lcv, slot;
+	struct vm_anon *anon;
+	UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
+
+	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+		int refs;
+
+		slot = amap->am_slots[lcv];
+		anon = amap->am_anon[slot];
+
+		if (anon == NULL || anon->an_ref == 0) 
+			panic("amap_wipeout: corrupt amap");
+
+		simple_lock(&anon->an_lock); /* lock anon */
+
+		UVMHIST_LOG(maphist,"  processing anon 0x%x, ref=%d", anon, 
+		    anon->an_ref, 0, 0);
+
+		refs = --anon->an_ref;
+		simple_unlock(&anon->an_lock);
+		if (refs == 0) {
+			/*
+			 * we had the last reference to a vm_anon. free it.
+			 */
+			uvm_anfree(anon);
+		}
+	}
+
+	/*
+	 * now we free the map
+	 */
+
+	amap->am_ref = 0;	/* ... was one */
+	amap->am_nused = 0;
+	amap_free(amap);	/* will unlock and free amap */
+	UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
+}
+
+/*
+ * amap_copy: ensure that a map entry's "needs_copy" flag is false
+ *	by copying the amap if necessary.
+ * 
+ * => an entry with a null amap pointer will get a new (blank) one.
+ * => the map that the map entry belongs to must be locked by caller.
+ * => the amap currently attached to "entry" (if any) must be unlocked.
+ * => if canchunk is true, then we may clip the entry into a chunk
+ * => "startva" and "endva" are used only if canchunk is true.  they are
+ *     used to limit chunking (e.g. if you have a large space that you
+ *     know you are going to need to allocate amaps for, there is no point
+ *     in allowing that to be chunked)
+ */
+
+void
+amap_copy(map, entry, waitf, canchunk, startva, endva)
+	vm_map_t map;
+	vm_map_entry_t entry;
+	int waitf;
+	boolean_t canchunk;
+	vaddr_t startva, endva;
+{
+	struct vm_amap *amap, *srcamap;
+	int slots, lcv;
+	vaddr_t chunksize;
+	UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "  (map=%p, entry=%p, waitf=%d)", map, entry, waitf, 0);
+
+	/*
+	 * is there a map to copy?   if not, create one from scratch.
+	 */
+
+	if (entry->aref.ar_amap == NULL) {
+
+		/*
+		 * check to see if we have a large amap that we can
+		 * chunk.  we align startva/endva to chunk-sized
+		 * boundaries and then clip to them.
+		 */
+
+		if (canchunk && atop(entry->end - entry->start) >=
+		    UVM_AMAP_LARGE) {
+			/* convert slots to bytes */
+			chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
+			startva = (startva / chunksize) * chunksize;
+			endva = roundup(endva, chunksize);
+			UVMHIST_LOG(maphist, "  chunk amap ==> clip 0x%x->0x%x"
+			    "to 0x%x->0x%x", entry->start, entry->end, startva,
+			    endva);
+			UVM_MAP_CLIP_START(map, entry, startva);
+			/* watch out for endva wrap-around! */
+			if (endva >= startva)
+				UVM_MAP_CLIP_END(map, entry, endva);
+		}
+
+		UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]", 
+		entry->start, entry->end, 0, 0);
+		entry->aref.ar_pageoff = 0;
+		entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
+		    waitf);
+		if (entry->aref.ar_amap != NULL)
+			entry->etype &= ~UVM_ET_NEEDSCOPY;
+		return;
+	}
+
+	/*
+	 * first check and see if we are the only map entry
+	 * referencing the amap we currently have.  if so, then we can
+	 * just take it over rather than copying it.  note that we are
+	 * reading am_ref with the amap unlocked... the value can only
+	 * be one if we have the only reference to the amap (via our
+	 * locked map).  if we are greater than one we fall through to
+	 * the next case (where we double check the value).
+	 */
+
+	if (entry->aref.ar_amap->am_ref == 1) {
+		entry->etype &= ~UVM_ET_NEEDSCOPY;
+		UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
+		    0, 0, 0, 0);
+		return;
+	}
+
+	/*
+	 * looks like we need to copy the map.
+	 */
+
+	UVMHIST_LOG(maphist,"  amap=%p, ref=%d, must copy it", 
+	    entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
+	AMAP_B2SLOT(slots, entry->end - entry->start);
+	amap = amap_alloc1(slots, 0, waitf);
+	if (amap == NULL) {
+		UVMHIST_LOG(maphist, "  amap_alloc1 failed", 0,0,0,0);
+		return;
+	}
+	srcamap = entry->aref.ar_amap;
+	amap_lock(srcamap);
+
+	/*
+	 * need to double check reference count now that we've got the
+	 * src amap locked down.  the reference count could have
+	 * changed while we were in malloc.  if the reference count
+	 * dropped down to one we take over the old map rather than
+	 * copying the amap.
+	 */
+
+	if (srcamap->am_ref == 1) {		/* take it over? */
+		entry->etype &= ~UVM_ET_NEEDSCOPY;
+		amap->am_ref--;		/* drop final reference to map */
+		amap_free(amap);	/* dispose of new (unused) amap */
+		amap_unlock(srcamap);
+		return;
+	}
+
+	/*
+	 * we must copy it now.
+	 */
+
+	UVMHIST_LOG(maphist, "  copying amap now",0, 0, 0, 0);
+	for (lcv = 0 ; lcv < slots; lcv++) {
+		amap->am_anon[lcv] =
+		    srcamap->am_anon[entry->aref.ar_pageoff + lcv];
+		if (amap->am_anon[lcv] == NULL)
+			continue;
+		simple_lock(&amap->am_anon[lcv]->an_lock);
+		amap->am_anon[lcv]->an_ref++;
+		simple_unlock(&amap->am_anon[lcv]->an_lock);
+		amap->am_bckptr[lcv] = amap->am_nused;
+		amap->am_slots[amap->am_nused] = lcv;
+		amap->am_nused++;
+	}
+
+	/*
+	 * drop our reference to the old amap (srcamap) and unlock.
+	 * we know that the reference count on srcamap is greater than
+	 * one (we checked above), so there is no way we could drop
+	 * the count to zero.  [and no need to worry about freeing it]
+	 */
+
+	srcamap->am_ref--;
+	if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
+		srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
+#ifdef UVM_AMAP_PPREF
+	if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
+		amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
+		    entry->end - entry->start, -1);
+	}
+#endif
+
+	amap_unlock(srcamap);
+
+	/*
+	 * install new amap.
+	 */
+
+	entry->aref.ar_pageoff = 0;
+	entry->aref.ar_amap = amap;
+	entry->etype &= ~UVM_ET_NEEDSCOPY;
+
+	/*
+	 * done!
+	 */
+	UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
+}
+
+/*
+ * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
+ *
+ *	called during fork(2) when the parent process has a wired map
+ *	entry.   in that case we want to avoid write-protecting pages
+ *	in the parent's map (e.g. like what you'd do for a COW page)
+ *	so we resolve the COW here.
+ *
+ * => assume parent's entry was wired, thus all pages are resident.
+ * => assume pages that are loaned out (loan_count) are already mapped
+ *	read-only in all maps, and thus no need for us to worry about them
+ * => assume both parent and child vm_map's are locked
+ * => caller passes child's map/entry in to us
+ * => if we run out of memory we will unlock the amap and sleep _with_ the
+ *	parent and child vm_map's locked(!).    we have to do this since
+ *	we are in the middle of a fork(2) and we can't let the parent
+ *	map change until we are done copying all the map entrys.
+ * => XXXCDC: out of memory should cause fork to fail, but there is
+ *	currently no easy way to do this (needs fix)
+ * => page queues must be unlocked (we may lock them)
+ */
+
+void
+amap_cow_now(map, entry)
+	struct vm_map *map;
+	struct vm_map_entry *entry;
+{
+	struct vm_amap *amap = entry->aref.ar_amap;
+	int lcv, slot;
+	struct vm_anon *anon, *nanon;
+	struct vm_page *pg, *npg;
+
+	/*
+	 * note that if we unlock the amap then we must ReStart the "lcv" for
+	 * loop because some other process could reorder the anon's in the
+	 * am_anon[] array on us while the lock is dropped.
+	 */
+ReStart:
+	amap_lock(amap);
+
+	for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+
+		/*
+		 * get the page
+		 */
+
+		slot = amap->am_slots[lcv];
+		anon = amap->am_anon[slot];
+		simple_lock(&anon->an_lock);
+		pg = anon->u.an_page;
+
+		/*
+		 * page must be resident since parent is wired
+		 */
+
+		if (pg == NULL)
+		    panic("amap_cow_now: non-resident wired page in anon %p",
+			anon);
+
+		/*
+		 * if the anon ref count is one and the page is not loaned,
+		 * then we are safe (the child has exclusive access to the
+		 * page).  if the page is loaned, then it must already be
+		 * mapped read-only.
+		 *
+		 * we only need to get involved when these are not true.
+		 * [note: if loan_count == 0, then the anon must own the page]
+		 */
+
+		if (anon->an_ref > 1 && pg->loan_count == 0) {
+
+			/*
+			 * if the page is busy then we have to unlock, wait for
+			 * it and then restart.
+			 */
+			if (pg->flags & PG_BUSY) {
+				pg->flags |= PG_WANTED;
+				amap_unlock(amap);
+				UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
+				    "cownow", 0);
+				goto ReStart;
+			}
+
+			/*
+			 * ok, time to do a copy-on-write to a new anon
+			 */
+			nanon = uvm_analloc();
+			if (nanon)
+				npg = uvm_pagealloc(NULL, 0, nanon);
+			else
+				npg = NULL;	/* XXX: quiet gcc warning */
+
+			if (nanon == NULL || npg == NULL) {
+				/* out of memory */
+				/*
+				 * XXXCDC: we should cause fork to fail, but
+				 * we can't ...
+				 */
+				if (nanon)
+					uvm_anfree(nanon);
+				simple_unlock(&anon->an_lock);
+				amap_unlock(amap);
+				uvm_wait("cownowpage");
+				goto ReStart;
+			}
+	
+			/*
+			 * got it... now we can copy the data and replace anon
+			 * with our new one...
+			 */
+			uvm_pagecopy(pg, npg);		/* old -> new */
+			anon->an_ref--;			/* can't drop to zero */
+			amap->am_anon[slot] = nanon;	/* replace */
+
+			/*
+			 * drop PG_BUSY on new page ... since we have had it's
+			 * owner locked the whole time it can't be
+			 * PG_RELEASED | PG_WANTED.
+			 */
+			npg->flags &= ~(PG_BUSY|PG_FAKE);
+			UVM_PAGE_OWN(npg, NULL);
+			uvm_lock_pageq();
+			uvm_pageactivate(npg);
+			uvm_unlock_pageq();
+		}
+
+		simple_unlock(&anon->an_lock);
+		/*
+		 * done with this anon, next ...!
+		 */
+
+	}	/* end of 'for' loop */
+
+	return;
+}
+
+/*
+ * amap_splitref: split a single reference into two seperate references
+ *
+ * => called from uvm_map's clip routines
+ * => origref's map should be locked
+ * => origref->ar_amap should be unlocked (we will lock)
+ */
+void
+amap_splitref(origref, splitref, offset)
+	struct vm_aref *origref, *splitref;
+	vaddr_t offset;
+{
+	int leftslots;
+	UVMHIST_FUNC("amap_splitref"); UVMHIST_CALLED(maphist);
+
+	AMAP_B2SLOT(leftslots, offset);
+	if (leftslots == 0)
+		panic("amap_splitref: split at zero offset");
+
+	/*
+	 * lock the amap
+	 */
+	amap_lock(origref->ar_amap);
+
+	/*
+	 * now: amap is locked and we have a valid am_mapped array.
+	 */
+
+	if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
+		panic("amap_splitref: map size check failed");
+
+#ifdef UVM_AMAP_PPREF
+        /*
+	 * establish ppref before we add a duplicate reference to the amap
+	 */
+	if (origref->ar_amap->am_ppref == NULL)
+		amap_pp_establish(origref->ar_amap);
+#endif
+
+	splitref->ar_amap = origref->ar_amap;
+	splitref->ar_amap->am_ref++;		/* not a share reference */
+	splitref->ar_pageoff = origref->ar_pageoff + leftslots;
+
+	amap_unlock(origref->ar_amap);
+}
+
+#ifdef UVM_AMAP_PPREF
+
+/*
+ * amap_pp_establish: add a ppref array to an amap, if possible
+ *
+ * => amap locked by caller
+ */
+void
+amap_pp_establish(amap)
+	struct vm_amap *amap;
+{
+
+	MALLOC(amap->am_ppref, int *, sizeof(int) * amap->am_maxslot,
+	    M_UVMAMAP, M_NOWAIT);
+
+	/*
+	 * if we fail then we just won't use ppref for this amap
+	 */
+	if (amap->am_ppref == NULL) {
+		amap->am_ppref = PPREF_NONE;	/* not using it */
+		return;
+	}
+
+	/*
+	 * init ppref
+	 */
+	bzero(amap->am_ppref, sizeof(int) * amap->am_maxslot);
+	pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
+	return;
+}
+
+/*
+ * amap_pp_adjref: adjust reference count to a part of an amap using the
+ * per-page reference count array.
+ *
+ * => map and amap locked by caller
+ * => caller must check that ppref != PPREF_NONE before calling
+ */
+void
+amap_pp_adjref(amap, curslot, bytelen, adjval)
+	struct vm_amap *amap;
+	int curslot;
+	vsize_t bytelen;
+	int adjval;
+{
+	int slots, stopslot, *ppref, lcv;
+	int ref, len;
+
+	/*
+	 * get init values
+	 */
+
+	AMAP_B2SLOT(slots, bytelen);
+	stopslot = curslot + slots;
+	ppref = amap->am_ppref;
+
+	/*
+	 * first advance to the correct place in the ppref array, fragment
+	 * if needed.
+	 */
+
+	for (lcv = 0 ; lcv < curslot ; lcv += len) {
+		pp_getreflen(ppref, lcv, &ref, &len);
+		if (lcv + len > curslot) {     /* goes past start? */
+			pp_setreflen(ppref, lcv, ref, curslot - lcv);
+			pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
+			len = curslot - lcv;   /* new length of entry @ lcv */
+		}
+	}
+
+	/*
+	 * now adjust reference counts in range (make sure we dont overshoot)
+	 */
+
+	if (lcv != curslot)
+		panic("amap_pp_adjref: overshot target");
+
+	for (/* lcv already set */; lcv < stopslot ; lcv += len) {
+		pp_getreflen(ppref, lcv, &ref, &len);
+		if (lcv + len > stopslot) {     /* goes past end? */
+			pp_setreflen(ppref, lcv, ref, stopslot - lcv);
+			pp_setreflen(ppref, stopslot, ref,
+			    len - (stopslot - lcv));
+			len = stopslot - lcv;
+		}
+		ref = ref + adjval;    /* ADJUST! */
+		if (ref < 0)
+			panic("amap_pp_adjref: negative reference count");
+		pp_setreflen(ppref, lcv, ref, len);
+		if (ref == 0)
+			amap_wiperange(amap, lcv, len);
+	}
+
+}
+
+/*
+ * amap_wiperange: wipe out a range of an amap
+ * [different from amap_wipeout because the amap is kept intact]
+ *
+ * => both map and amap must be locked by caller.
+ */
+void
+amap_wiperange(amap, slotoff, slots)
+	struct vm_amap *amap;
+	int slotoff, slots;
+{
+	int byanon, lcv, stop, curslot, ptr;
+	struct vm_anon *anon;
+	UVMHIST_FUNC("amap_wiperange"); UVMHIST_CALLED(maphist);
+
+	/*
+	 * we can either traverse the amap by am_anon or by am_slots depending
+	 * on which is cheaper.    decide now.
+	 */
+
+	if (slots < amap->am_nused) {
+		byanon = TRUE;
+		lcv = slotoff;
+		stop = slotoff + slots;
+	} else {
+		byanon = FALSE;
+		lcv = 0;
+		stop = amap->am_nused;
+	}
+
+	/*
+	 * ok, now do it!
+	 */
+
+	for (; lcv < stop; lcv++) {
+		int refs;
+
+		/*
+		 * verify the anon is ok.
+		 */
+		if (byanon) {
+			if (amap->am_anon[lcv] == NULL)
+				continue;
+			curslot = lcv;
+		} else {
+			curslot = amap->am_slots[lcv];
+			if (curslot < slotoff || curslot >= stop)
+				continue;
+		}
+		anon = amap->am_anon[curslot];
+
+		/*
+		 * remove it from the amap
+		 */
+		amap->am_anon[curslot] = NULL;
+		ptr = amap->am_bckptr[curslot];
+		if (ptr != (amap->am_nused - 1)) {
+			amap->am_slots[ptr] =
+			    amap->am_slots[amap->am_nused - 1];
+			amap->am_bckptr[amap->am_slots[ptr]] =
+			    ptr;    /* back ptr. */
+		}
+		amap->am_nused--;
+
+		/*
+		 * drop anon reference count
+		 */
+		simple_lock(&anon->an_lock);
+		refs = --anon->an_ref;
+		simple_unlock(&anon->an_lock);
+		if (refs == 0) {
+			/*
+			 * we just eliminated the last reference to an anon.
+			 * free it.
+			 */
+			uvm_anfree(anon);
+		}
+	}
+}
+
+#endif
diff --git a/sys/uvm/uvm_amap.h b/sys/uvm/uvm_amap.h
new file mode 100644
index 00000000000..8783790017f
--- /dev/null
+++ b/sys/uvm/uvm_amap.h
@@ -0,0 +1,282 @@
+/*	$NetBSD: uvm_amap.h,v 1.10 1999/01/28 14:46:27 chuck Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UVM_UVM_AMAP_H_
+#define _UVM_UVM_AMAP_H_
+
+/*
+ * uvm_amap.h: general amap interface and amap implementation-specific info
+ */
+
+/*
+ * an amap structure contains pointers to a set of anons that are
+ * mapped together in virtual memory (an anon is a single page of
+ * anonymous virtual memory -- see uvm_anon.h).  in uvm we hide the
+ * details of the implementation of amaps behind a general amap
+ * interface.  this allows us to change the amap implementation
+ * without having to touch the rest of the code.  this file is divided
+ * into two parts: the definition of the uvm amap interface and the
+ * amap implementation-specific definitions.
+ */
+
+/*
+ * part 1: amap interface
+ */
+
+/*
+ * forward definition of vm_amap structure.  only amap
+ * implementation-specific code should directly access the fields of
+ * this structure.  
+ */
+
+struct vm_amap;
+
+/*
+ * handle inline options... we allow amap ops to be inline, but we also
+ * provide a hook to turn this off.  macros can also be used.
+ */
+
+#ifdef UVM_AMAP_INLINE			/* defined/undef'd in uvm_amap.c */
+#define AMAP_INLINE static __inline	/* inline enabled */
+#else 
+#define AMAP_INLINE			/* inline disabled */
+#endif /* UVM_AMAP_INLINE */
+
+
+/*
+ * prototypes for the amap interface 
+ */
+
+AMAP_INLINE
+vaddr_t		amap_add 	/* add an anon to an amap */
+			__P((struct vm_aref *, vaddr_t,
+			     struct vm_anon *, int));
+struct vm_amap	*amap_alloc	/* allocate a new amap */
+			__P((vaddr_t, vaddr_t, int));
+void		amap_copy	/* clear amap needs-copy flag */
+			__P((vm_map_t, vm_map_entry_t, int, 
+			     boolean_t,	vaddr_t, vaddr_t));
+void		amap_cow_now	/* resolve all COW faults now */
+			__P((vm_map_t, vm_map_entry_t));
+void		amap_extend	/* make amap larger */
+			__P((vm_map_entry_t, vsize_t));
+int		amap_flags	/* get amap's flags */
+			__P((struct vm_amap *));
+void		amap_free	/* free amap */
+			__P((struct vm_amap *)); 
+void		amap_init	/* init amap module (at boot time) */
+			__P((void));
+void		amap_lock	/* lock amap */
+			__P((struct vm_amap *));
+AMAP_INLINE
+struct vm_anon	*amap_lookup	/* lookup an anon @ offset in amap */
+			__P((struct vm_aref *, vaddr_t));
+AMAP_INLINE
+void		amap_lookups	/* lookup multiple anons */
+			__P((struct vm_aref *, vaddr_t, 
+			     struct vm_anon **, int));
+AMAP_INLINE
+void		amap_ref	/* add a reference to an amap */
+			__P((vm_map_entry_t, int));
+int		amap_refs	/* get number of references of amap */
+			__P((struct vm_amap *));
+void		amap_share_protect /* protect pages in a shared amap */
+			__P((vm_map_entry_t, vm_prot_t));
+void		amap_splitref	/* split reference to amap into two */
+			__P((struct vm_aref *, struct vm_aref *, 
+			     vaddr_t));
+AMAP_INLINE
+void		amap_unadd	/* remove an anon from an amap */
+			__P((struct vm_amap *, vaddr_t));
+void		amap_unlock	/* unlock amap */
+			__P((struct vm_amap *));
+AMAP_INLINE
+void		amap_unref	/* drop reference to an amap */
+			 __P((vm_map_entry_t, int));
+void		amap_wipeout	/* remove all anons from amap */
+			__P((struct vm_amap *));
+
+/*
+ * amap flag values
+ */
+
+#define AMAP_SHARED	0x1	/* amap is shared */
+#define AMAP_REFALL	0x2	/* amap_ref: reference entire amap */
+
+
+/**********************************************************************/
+
+/*
+ * part 2: amap implementation-specific info
+ */
+
+/*
+ * we currently provide an array-based amap implementation.  in this
+ * implementation we provide the option of tracking split references
+ * so that we don't lose track of references during partial unmaps
+ * ... this is enabled with the "UVM_AMAP_PPREF" define.
+ */
+
+#define UVM_AMAP_PPREF		/* track partial references */
+
+/*
+ * here is the definition of the vm_amap structure for this implementation.
+ */
+
+struct vm_amap {
+	simple_lock_data_t am_l; /* simple lock [locks all vm_amap fields] */
+	int am_ref;		/* reference count */
+	int am_flags;		/* flags */
+	int am_maxslot;		/* max # of slots allocated */
+	int am_nslot;		/* # of slots currently in map ( <= maxslot) */
+	int am_nused;		/* # of slots currently in use */
+	int *am_slots;		/* contig array of active slots */
+	int *am_bckptr;		/* back pointer array to am_slots */
+	struct vm_anon **am_anon; /* array of anonymous pages */
+#ifdef UVM_AMAP_PPREF
+	int *am_ppref;		/* per page reference count (if !NULL) */
+#endif
+};
+
+/*
+ * note that am_slots, am_bckptr, and am_anon are arrays.   this allows
+ * fast lookup of pages based on their virual address at the expense of
+ * some extra memory.   in the future we should be smarter about memory
+ * usage and fall back to a non-array based implementation on systems 
+ * that are short of memory (XXXCDC).
+ *
+ * the entries in the array are called slots... for example an amap that
+ * covers four pages of virtual memory is said to have four slots.   here
+ * is an example of the array usage for a four slot amap.   note that only
+ * slots one and three have anons assigned to them.  "D/C" means that we
+ * "don't care" about the value.
+ * 
+ *            0     1      2     3
+ * am_anon:   NULL, anon0, NULL, anon1		(actual pointers to anons)
+ * am_bckptr: D/C,  1,     D/C,  0		(points to am_slots entry)
+ *
+ * am_slots:  3, 1, D/C, D/C    		(says slots 3 and 1 are in use)
+ * 
+ * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
+ * to find the entry in am_slots for an anon, look at am_bckptr[slot],
+ * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
+ * in general, if am_anon[X] is non-NULL, then the following must be
+ * true: am_slots[am_bckptr[X]] == X
+ *
+ * note that am_slots is always contig-packed.
+ */
+
+/*
+ * defines for handling of large sparce amaps:
+ * 
+ * one of the problems of array-based amaps is that if you allocate a
+ * large sparcely-used area of virtual memory you end up allocating
+ * large arrays that, for the most part, don't get used.  this is a
+ * problem for BSD in that the kernel likes to make these types of
+ * allocations to "reserve" memory for possible future use.
+ *
+ * for example, the kernel allocates (reserves) a large chunk of user
+ * VM for possible stack growth.  most of the time only a page or two
+ * of this VM is actually used.  since the stack is anonymous memory
+ * it makes sense for it to live in an amap, but if we allocated an
+ * amap for the entire stack range we could end up wasting a large
+ * amount of malloc'd KVM.
+ * 
+ * for example, on the i386 at boot time we allocate two amaps for the stack 
+ * of /sbin/init: 
+ *  1. a 7680 slot amap at protection 0 (reserve space for stack)
+ *  2. a 512 slot amap at protection 7 (top of stack)
+ *
+ * most of the array allocated for the amaps for this is never used.  
+ * the amap interface provides a way for us to avoid this problem by
+ * allowing amap_copy() to break larger amaps up into smaller sized 
+ * chunks (controlled by the "canchunk" option).   we use this feature
+ * to reduce our memory usage with the BSD stack management.  if we
+ * are asked to create an amap with more than UVM_AMAP_LARGE slots in it,
+ * we attempt to break it up into a UVM_AMAP_CHUNK sized amap if the
+ * "canchunk" flag is set.
+ *
+ * so, in the i386 example, the 7680 slot area is never referenced so
+ * nothing gets allocated (amap_copy is never called because the protection
+ * is zero).   the 512 slot area for the top of the stack is referenced.
+ * the chunking code breaks it up into 16 slot chunks (hopefully a single
+ * 16 slot chunk is enough to handle the whole stack).
+ */
+
+#define UVM_AMAP_LARGE	256	/* # of slots in "large" amap */
+#define UVM_AMAP_CHUNK	16	/* # of slots to chunk large amaps in */
+
+
+/*
+ * macros
+ */
+
+/* AMAP_B2SLOT: convert byte offset to slot */
+#ifdef DIAGNOSTIC
+#define AMAP_B2SLOT(S,B) { \
+	if ((B) & (PAGE_SIZE - 1)) \
+		panic("AMAP_B2SLOT: invalid byte count"); \
+	(S) = (B) >> PAGE_SHIFT; \
+}
+#else
+#define AMAP_B2SLOT(S,B) (S) = (B) >> PAGE_SHIFT
+#endif
+
+/*
+ * lock/unlock/refs/flags macros
+ */
+
+#define amap_flags(AMAP)	((AMAP)->am_flags)
+#define amap_lock(AMAP)		simple_lock(&(AMAP)->am_l)
+#define amap_refs(AMAP)		((AMAP)->am_ref)
+#define amap_unlock(AMAP)	simple_unlock(&(AMAP)->am_l)
+
+/*
+ * if we enable PPREF, then we have a couple of extra functions that
+ * we need to prototype here...
+ */
+
+#ifdef UVM_AMAP_PPREF
+
+#define PPREF_NONE ((int *) -1)	/* not using ppref */
+
+void		amap_pp_adjref		/* adjust references */
+			 __P((struct vm_amap *, int, vsize_t, int));
+void		amap_pp_establish	/* establish ppref */
+			__P((struct vm_amap *));
+void		amap_wiperange		/* wipe part of an amap */
+			__P((struct vm_amap *, int, int));
+#endif	/* UVM_AMAP_PPREF */
+
+#endif /* _UVM_UVM_AMAP_H_ */
diff --git a/sys/uvm/uvm_amap_i.h b/sys/uvm/uvm_amap_i.h
new file mode 100644
index 00000000000..d5bbe11c054
--- /dev/null
+++ b/sys/uvm/uvm_amap_i.h
@@ -0,0 +1,291 @@
+/*	$NetBSD: uvm_amap_i.h,v 1.11 1999/01/28 14:46:27 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_amap_i.h,v 1.1.2.4 1998/01/05 18:12:57 chuck Exp
+ */
+
+#ifndef _UVM_UVM_AMAP_I_H_
+#define _UVM_UVM_AMAP_I_H_
+
+/*
+ * uvm_amap_i.h
+ */
+
+/*
+ * if inlines are enabled always pull in these functions, otherwise
+ * pull them in only once (when we are compiling uvm_amap.c).
+ */
+
+#if defined(UVM_AMAP_INLINE) || defined(UVM_AMAP_C)
+
+/*
+ * amap_lookup: look up a page in an amap
+ *
+ * => amap should be locked by caller.
+ */
+AMAP_INLINE struct vm_anon *
+amap_lookup(aref, offset)
+	struct vm_aref *aref;
+	vaddr_t offset;
+{
+	int slot;
+	struct vm_amap *amap = aref->ar_amap;
+	UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
+
+	AMAP_B2SLOT(slot, offset);
+	slot += aref->ar_pageoff;
+
+	if (slot >= amap->am_nslot)
+		panic("amap_lookup: offset out of range");
+
+	UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
+	    amap, offset, amap->am_anon[slot], 0);
+	return(amap->am_anon[slot]);
+}
+
+/*
+ * amap_lookups: look up a range of pages in an amap
+ *
+ * => amap should be locked by caller.
+ * => XXXCDC: this interface is biased toward array-based amaps.  fix.
+ */
+AMAP_INLINE void
+amap_lookups(aref, offset, anons, npages)
+	struct vm_aref *aref;
+	vaddr_t offset;
+	struct vm_anon **anons;
+	int npages;
+{
+	int slot;
+	struct vm_amap *amap = aref->ar_amap;
+	UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
+
+	AMAP_B2SLOT(slot, offset);
+	slot += aref->ar_pageoff;
+
+	UVMHIST_LOG(maphist, "  slot=%d, npages=%d, nslot=%d", slot, npages,
+		amap->am_nslot, 0);
+
+	if ((slot + (npages - 1)) >= amap->am_nslot)
+		panic("amap_lookups: offset out of range");
+
+	bcopy(&amap->am_anon[slot], anons, npages * sizeof(struct vm_anon *));
+
+	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
+	return;
+}
+
+/*
+ * amap_add: add (or replace) a page to an amap
+ *
+ * => caller must lock amap.   
+ * => if (replace) caller must lock anon because we might have to call
+ *	pmap_page_protect on the anon's page.
+ * => returns an "offset" which is meaningful to amap_unadd().
+ */
+AMAP_INLINE vaddr_t
+amap_add(aref, offset, anon, replace)
+	struct vm_aref *aref;
+	vaddr_t offset;
+	struct vm_anon *anon;
+	int replace;
+{
+	int slot;
+	struct vm_amap *amap = aref->ar_amap;
+	UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
+
+	AMAP_B2SLOT(slot, offset);
+	slot += aref->ar_pageoff;
+
+	if (slot >= amap->am_nslot)
+		panic("amap_add: offset out of range");
+
+	if (replace) {
+
+		if (amap->am_anon[slot] == NULL)
+			panic("amap_add: replacing null anon");
+		if (amap->am_anon[slot]->u.an_page != NULL && 
+		    (amap->am_flags & AMAP_SHARED) != 0) {
+			pmap_page_protect(
+			    PMAP_PGARG(amap->am_anon[slot]->u.an_page),
+			    VM_PROT_NONE);
+			/*
+			 * XXX: suppose page is supposed to be wired somewhere?
+			 */
+		}
+	} else {   /* !replace */
+		if (amap->am_anon[slot] != NULL)
+			panic("amap_add: slot in use");
+
+		amap->am_bckptr[slot] = amap->am_nused;
+		amap->am_slots[amap->am_nused] = slot;
+		amap->am_nused++;
+	}
+	amap->am_anon[slot] = anon;
+	UVMHIST_LOG(maphist,
+	    "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
+	    amap, offset, anon, replace);
+
+	return(slot);
+}
+
+/*
+ * amap_unadd: remove a page from an amap, given we know the slot #.
+ *
+ * => caller must lock amap
+ */
+AMAP_INLINE void
+amap_unadd(amap, slot)
+	struct vm_amap *amap;
+	vaddr_t slot;
+{
+	int ptr;
+	UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
+
+	if (slot >= amap->am_nslot)
+		panic("amap_add: offset out of range");
+
+	if (amap->am_anon[slot] == NULL)
+		panic("amap_unadd: nothing there");
+
+	amap->am_anon[slot] = NULL;
+	ptr = amap->am_bckptr[slot];
+
+	if (ptr != (amap->am_nused - 1)) {	/* swap to keep slots contig? */
+		amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
+		amap->am_bckptr[amap->am_slots[ptr]] = ptr;	/* back link */
+	}
+	amap->am_nused--;
+	UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
+}
+
+/*
+ * amap_ref: gain a reference to an amap
+ *
+ * => amap must not be locked (we will lock)
+ * => called at fork time to gain the child's reference
+ */
+AMAP_INLINE void
+amap_ref(entry, flags)
+	vm_map_entry_t entry;
+	int flags;
+{
+	struct vm_amap *amap = entry->aref.ar_amap;
+	UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
+
+	amap_lock(amap);
+	amap->am_ref++;
+	if (flags & AMAP_SHARED)
+		amap->am_flags |= AMAP_SHARED;
+#ifdef UVM_AMAP_PPREF
+	if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
+	    (entry->start - entry->end) >> PAGE_SHIFT != amap->am_nslot)
+		amap_pp_establish(amap);
+	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+		if (flags & AMAP_REFALL)
+			amap_pp_adjref(amap, 0, amap->am_nslot << PAGE_SHIFT, 1);
+		else
+			amap_pp_adjref(amap, entry->aref.ar_pageoff, 
+			 	entry->end - entry->start, 1);
+	}
+#endif
+	amap_unlock(amap);
+	UVMHIST_LOG(maphist,"<- done!  amap=0x%x", amap, 0, 0, 0);
+}
+
+/*
+ * amap_unref: remove a reference to an amap
+ *
+ * => caller must remove all pmap-level references to this amap before
+ *	dropping the reference
+ * => called from uvm_unmap_detach [only]  ... note that entry is no
+ *	longer part of a map and thus has no need for locking
+ * => amap must be unlocked (we will lock it).
+ */
+AMAP_INLINE void
+amap_unref(entry, all)
+	vm_map_entry_t entry;
+	int all;
+{
+	struct vm_amap *amap = entry->aref.ar_amap;
+	UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
+
+	/*
+	 * lock it
+	 */
+	amap_lock(amap);
+
+	UVMHIST_LOG(maphist,"(entry=0x%x)  amap=0x%x  refs=%d, nused=%d",
+	    entry, amap, amap->am_ref, amap->am_nused);
+
+	/*
+	 * if we are the last reference, free the amap and return.
+	 */
+
+	if (amap->am_ref == 1) {
+		amap_wipeout(amap);	/* drops final ref and frees */
+		UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
+		return;			/* no need to unlock */
+	}
+
+	/*
+	 * otherwise just drop the reference count(s)
+	 */
+
+	amap->am_ref--;
+	if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
+		amap->am_flags &= ~AMAP_SHARED;	/* clear shared flag */
+#ifdef UVM_AMAP_PPREF
+	if (amap->am_ppref == NULL && all == 0 &&
+	    (entry->start - entry->end) >> PAGE_SHIFT != amap->am_nslot)
+		amap_pp_establish(amap);
+	if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+		if (all)
+			amap_pp_adjref(amap, 0, amap->am_nslot << PAGE_SHIFT, -1);
+		else
+			amap_pp_adjref(amap, entry->aref.ar_pageoff, 
+			    entry->end - entry->start, -1);
+	}
+#endif
+	amap_unlock(amap);
+
+	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+}
+
+#endif /* defined(UVM_AMAP_INLINE) || defined(UVM_AMAP_C) */
+
+#endif /* _UVM_UVM_AMAP_I_H_ */
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
new file mode 100644
index 00000000000..214e12df701
--- /dev/null
+++ b/sys/uvm/uvm_anon.c
@@ -0,0 +1,345 @@
+/*	$NetBSD: uvm_anon.c,v 1.1 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_anon.c: uvm anon ops
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * allocate anons
+ */
+void
+uvm_anon_init()
+{
+	struct vm_anon *anon;
+	int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
+	int lcv;
+
+	/*
+	 * Allocate the initial anons.
+	 */
+	anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
+	    sizeof(*anon) * nanon);
+	if (anon == NULL) {
+		printf("uvm_anon_init: can not allocate %d anons\n", nanon);
+		panic("uvm_anon_init");
+	}
+
+	bzero(anon, sizeof(*anon) * nanon);
+	uvm.afree = NULL;
+	uvmexp.nanon = uvmexp.nfreeanon = nanon;
+	for (lcv = 0 ; lcv < nanon ; lcv++) {
+		anon[lcv].u.an_nxt = uvm.afree;
+		uvm.afree = &anon[lcv];
+	}
+	simple_lock_init(&uvm.afreelock);
+}
+
+/*
+ * add some more anons to the free pool.  called when we add
+ * more swap space.
+ */
+void
+uvm_anon_add(pages)
+	int	pages;
+{
+	struct vm_anon *anon;
+	int lcv;
+
+	anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
+	    sizeof(*anon) * pages);
+
+	/* XXX Should wait for VM to free up. */
+	if (anon == NULL) {
+		printf("uvm_anon_add: can not allocate %d anons\n", pages);
+		panic("uvm_anon_add");
+	}
+
+	simple_lock(&uvm.afreelock);
+	bzero(anon, sizeof(*anon) * pages);
+	uvmexp.nanon += pages;
+	uvmexp.nfreeanon += pages;
+	for (lcv = 0; lcv < pages; lcv++) {
+		simple_lock_init(&anon->an_lock);
+		anon[lcv].u.an_nxt = uvm.afree;
+		uvm.afree = &anon[lcv];
+	}
+	simple_unlock(&uvm.afreelock);
+}
+
+/*
+ * allocate an anon
+ */
+struct vm_anon *
+uvm_analloc()
+{
+	struct vm_anon *a;
+
+	simple_lock(&uvm.afreelock);
+	a = uvm.afree;
+	if (a) {
+		uvm.afree = a->u.an_nxt;
+		uvmexp.nfreeanon--;
+		a->an_ref = 1;
+		a->an_swslot = 0;
+		a->u.an_page = NULL;		/* so we can free quickly */
+	}
+	simple_unlock(&uvm.afreelock);
+	return(a);
+}
+
+/*
+ * uvm_anfree: free a single anon structure
+ *
+ * => caller must remove anon from its amap before calling (if it was in
+ *	an amap).
+ * => anon must be unlocked and have a zero reference count.
+ * => we may lock the pageq's.
+ */
+void
+uvm_anfree(anon)
+	struct vm_anon *anon;
+{
+	struct vm_page *pg;
+	UVMHIST_FUNC("uvm_anfree"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0);
+
+	/*
+	 * get page
+	 */
+
+	pg = anon->u.an_page;
+
+	/*
+	 * if there is a resident page and it is loaned, then anon may not
+	 * own it.   call out to uvm_anon_lockpage() to ensure the real owner
+ 	 * of the page has been identified and locked.
+	 */
+
+	if (pg && pg->loan_count)
+		pg = uvm_anon_lockloanpg(anon);
+
+	/*
+	 * if we have a resident page, we must dispose of it before freeing
+	 * the anon.
+	 */
+
+	if (pg) {
+
+		/*
+		 * if the page is owned by a uobject (now locked), then we must 
+		 * kill the loan on the page rather than free it.
+		 */
+
+		if (pg->uobject) {
+
+			/* kill loan */
+			uvm_lock_pageq();
+#ifdef DIAGNOSTIC
+			if (pg->loan_count < 1)
+				panic("uvm_anfree: obj owned page "
+				      "with no loan count");
+#endif
+			pg->loan_count--;
+			pg->uanon = NULL;
+			uvm_unlock_pageq();
+			simple_unlock(&pg->uobject->vmobjlock);
+
+		} else {
+
+			/*
+			 * page has no uobject, so we must be the owner of it.
+			 *
+			 * if page is busy then we just mark it as released
+			 * (who ever has it busy must check for this when they
+			 * wake up).    if the page is not busy then we can
+			 * free it now.
+			 */
+
+			if ((pg->flags & PG_BUSY) != 0) {
+				/* tell them to dump it when done */
+				pg->flags |= PG_RELEASED;
+				simple_unlock(&anon->an_lock);
+				UVMHIST_LOG(maphist,
+				    "  anon 0x%x, page 0x%x: BUSY (released!)", 
+				    anon, pg, 0, 0);
+				return;
+			} 
+
+			pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+			uvm_lock_pageq();	/* lock out pagedaemon */
+			uvm_pagefree(pg);	/* bye bye */
+			uvm_unlock_pageq();	/* free the daemon */
+
+			UVMHIST_LOG(maphist,"  anon 0x%x, page 0x%x: freed now!", 
+			    anon, pg, 0, 0);
+		}
+	}
+
+	/*
+	 * are we using any backing store resources?   if so, free them.
+	 */
+	if (anon->an_swslot) {
+		/*
+		 * on backing store: no I/O in progress.  sole amap reference
+		 * is ours and we've got it locked down.   thus we can free,
+		 * and be done.
+		 */
+		UVMHIST_LOG(maphist,"  freeing anon 0x%x, paged to swslot 0x%x",
+		    anon, anon->an_swslot, 0, 0);
+		uvm_swap_free(anon->an_swslot, 1);
+		anon->an_swslot = 0;
+	} 
+
+	/*
+	 * now that we've stripped the data areas from the anon, free the anon
+	 * itself!
+	 */
+	simple_lock(&uvm.afreelock);
+	anon->u.an_nxt = uvm.afree;
+	uvm.afree = anon;
+	uvmexp.nfreeanon++;
+	simple_unlock(&uvm.afreelock);
+	UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
+}
+
+/*
+ * uvm_anon_lockloanpg: given a locked anon, lock its resident page
+ *
+ * => anon is locked by caller
+ * => on return: anon is locked
+ *		 if there is a resident page:
+ *			if it has a uobject, it is locked by us
+ *			if it is ownerless, we take over as owner
+ *		 we return the resident page (it can change during
+ *		 this function)
+ * => note that the only time an anon has an ownerless resident page
+ *	is if the page was loaned from a uvm_object and the uvm_object
+ *	disowned it
+ * => this only needs to be called when you want to do an operation
+ *	on an anon's resident page and that page has a non-zero loan
+ *	count.
+ */
+struct vm_page *
+uvm_anon_lockloanpg(anon)
+	struct vm_anon *anon;
+{
+	struct vm_page *pg;
+	boolean_t locked = FALSE;
+
+	/*
+	 * loop while we have a resident page that has a non-zero loan count.
+	 * if we successfully get our lock, we will "break" the loop.
+	 * note that the test for pg->loan_count is not protected -- this
+	 * may produce false positive results.   note that a false positive
+	 * result may cause us to do more work than we need to, but it will
+	 * not produce an incorrect result.
+	 */
+
+	while (((pg = anon->u.an_page) != NULL) && pg->loan_count != 0) {
+
+		/*
+		 * quickly check to see if the page has an object before
+		 * bothering to lock the page queues.   this may also produce
+		 * a false positive result, but that's ok because we do a real
+		 * check after that.
+		 *
+		 * XXX: quick check -- worth it?   need volatile?
+		 */
+
+		if (pg->uobject) {
+
+			uvm_lock_pageq();
+			if (pg->uobject) {	/* the "real" check */
+				locked =
+				    simple_lock_try(&pg->uobject->vmobjlock);
+			} else {
+				/* object disowned before we got PQ lock */
+				locked = TRUE;
+			}
+			uvm_unlock_pageq();
+
+			/*
+			 * if we didn't get a lock (try lock failed), then we
+			 * toggle our anon lock and try again
+			 */
+
+			if (!locked) {
+				simple_unlock(&anon->an_lock);
+				/*
+				 * someone locking the object has a chance to
+				 * lock us right now
+				 */
+				simple_lock(&anon->an_lock);
+				continue;		/* start over */
+			}
+		}
+
+		/*
+		 * if page is un-owned [i.e. the object dropped its ownership],
+		 * then we can take over as owner!
+		 */
+
+		if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
+			uvm_lock_pageq();
+			pg->pqflags |= PQ_ANON;		/* take ownership... */
+			pg->loan_count--;	/* ... and drop our loan */
+			uvm_unlock_pageq();
+		}
+
+		/*
+		 * we did it!   break the loop
+		 */
+		break;
+	}
+
+	/*
+	 * done!
+	 */
+
+	return(pg);
+}
diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h
new file mode 100644
index 00000000000..f52f6f646f4
--- /dev/null
+++ b/sys/uvm/uvm_anon.h
@@ -0,0 +1,105 @@
+/*	$NetBSD: uvm_anon.h,v 1.9 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UVM_UVM_ANON_H_
+#define _UVM_UVM_ANON_H_
+
+/*
+ * uvm_anon.h
+ */
+
+/*
+ * anonymous memory management
+ *
+ * anonymous virtual memory is short term virtual memory that goes away
+ * when the processes referencing it go away.    an anonymous page of
+ * virtual memory is described by the following data structure:
+ */
+
+struct vm_anon {
+	int an_ref;			/* reference count [an_lock] */
+	simple_lock_data_t an_lock;	/* lock for an_ref */
+	union {
+		struct vm_anon *an_nxt;	/* if on free list [afreelock] */
+		struct vm_page *an_page;/* if in RAM [an_lock] */
+	} u;
+	int an_swslot;		/* drum swap slot # (if != 0) 
+				   [an_lock.  also, it is ok to read
+				   an_swslot if we hold an_page PG_BUSY] */
+};
+
+/*
+ * a pool of vm_anon data structures is allocated and put on a global
+ * free list at boot time.  vm_anon's on the free list use "an_nxt" as
+ * a pointer to the next item on the free list.  for active vm_anon's
+ * the data can be in one of the following state: [1] in a vm_page
+ * with no backing store allocated yet, [2] in a vm_page with backing
+ * store allocated, or [3] paged out to backing store (no vm_page).
+ *
+ * for pageout in case [2]: if the page has been modified then we must
+ * flush it out to backing store, otherwise we can just dump the
+ * vm_page.
+ */
+
+/*
+ * anons are grouped together in anonymous memory maps, or amaps.
+ * amaps are defined in uvm_amap.h.
+ */
+
+/*
+ * processes reference anonymous virtual memory maps with an anonymous 
+ * reference structure:
+ */
+
+struct vm_aref {
+	int ar_pageoff;			/* page offset into amap we start */
+	struct vm_amap *ar_amap;	/* pointer to amap */
+};
+
+/*
+ * the offset field indicates which part of the amap we are referencing.
+ * locked by vm_map lock.
+ */
+
+/*
+ * prototypes
+ */
+
+struct vm_anon *uvm_analloc __P((void));
+void uvm_anfree __P((struct vm_anon *));
+void uvm_anon_init __P((void));
+void uvm_anon_add __P((int));
+struct vm_page *uvm_anon_lockloanpg __P((struct vm_anon *));
+
+#endif /* _UVM_UVM_ANON_H_ */
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
new file mode 100644
index 00000000000..8e0d3fc22ef
--- /dev/null
+++ b/sys/uvm/uvm_aobj.c
@@ -0,0 +1,1090 @@
+/*	$NetBSD: uvm_aobj.c,v 1.15 1998/10/18 23:49:59 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
+ *                    Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
+ */
+/*
+ * uvm_aobj.c: anonymous memory uvm_object pager
+ *
+ * author: Chuck Silvers <chuq@chuq.com>
+ * started: Jan-1998
+ *
+ * - design mostly from Chuck Cranor
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * an aobj manages anonymous-memory backed uvm_objects.   in addition
+ * to keeping the list of resident pages, it also keeps a list of
+ * allocated swap blocks.  depending on the size of the aobj this list
+ * of allocated swap blocks is either stored in an array (small objects)
+ * or in a hash table (large objects).
+ */
+
+/*
+ * local structures
+ */
+
+/*
+ * for hash tables, we break the address space of the aobj into blocks
+ * of UAO_SWHASH_CLUSTER_SIZE pages.   we require the cluster size to
+ * be a power of two.
+ */
+
+#define UAO_SWHASH_CLUSTER_SHIFT 4
+#define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
+
+/* get the "tag" for this page index */
+#define UAO_SWHASH_ELT_TAG(PAGEIDX) \
+	((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
+
+/* given an ELT and a page index, find the swap slot */
+#define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
+	((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
+
+/* given an ELT, return its pageidx base */
+#define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
+	((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
+
+/*
+ * the swhash hash function
+ */
+#define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
+	(&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
+			    & (AOBJ)->u_swhashmask)])
+
+/*
+ * the swhash threshhold determines if we will use an array or a
+ * hash table to store the list of allocated swap blocks.
+ */
+
+#define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
+#define UAO_USES_SWHASH(AOBJ) \
+	((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD)	/* use hash? */
+
+/*
+ * the number of buckets in a swhash, with an upper bound
+ */
+#define UAO_SWHASH_MAXBUCKETS 256
+#define UAO_SWHASH_BUCKETS(AOBJ) \
+	(min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
+	     UAO_SWHASH_MAXBUCKETS))
+
+
+/*
+ * uao_swhash_elt: when a hash table is being used, this structure defines
+ * the format of an entry in the bucket list.
+ */
+
+struct uao_swhash_elt {
+	LIST_ENTRY(uao_swhash_elt) list;	/* the hash list */
+	vaddr_t tag;			/* our 'tag' */
+	int count;				/* our number of active slots */
+	int slots[UAO_SWHASH_CLUSTER_SIZE];	/* the slots */
+};
+
+/*
+ * uao_swhash: the swap hash table structure
+ */
+
+LIST_HEAD(uao_swhash, uao_swhash_elt);
+
+/*
+ * uao_swhash_elt_pool: pool of uao_swhash_elt structures
+ */
+
+struct pool uao_swhash_elt_pool;
+
+/*
+ * uvm_aobj: the actual anon-backed uvm_object
+ *
+ * => the uvm_object is at the top of the structure, this allows
+ *   (struct uvm_device *) == (struct uvm_object *)
+ * => only one of u_swslots and u_swhash is used in any given aobj
+ */
+
+struct uvm_aobj {
+	struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
+	int u_pages;		 /* number of pages in entire object */
+	int u_flags;		 /* the flags (see uvm_aobj.h) */
+	int *u_swslots;		 /* array of offset->swapslot mappings */
+				 /*
+				  * hashtable of offset->swapslot mappings
+				  * (u_swhash is an array of bucket heads)
+				  */
+	struct uao_swhash *u_swhash;
+	u_long u_swhashmask;		/* mask for hashtable */
+	LIST_ENTRY(uvm_aobj) u_list;	/* global list of aobjs */
+};
+
+/*
+ * uvm_aobj_pool: pool of uvm_aobj structures
+ */
+
+struct pool uvm_aobj_pool;
+
+/*
+ * local functions
+ */
+
+static void			 uao_init __P((void));
+static struct uao_swhash_elt	*uao_find_swhash_elt __P((struct uvm_aobj *,
+							  int, boolean_t));
+static int			 uao_find_swslot __P((struct uvm_aobj *, 
+						      int));
+static boolean_t		 uao_flush __P((struct uvm_object *, 
+						vaddr_t, vaddr_t, 
+						int));
+static void			 uao_free __P((struct uvm_aobj *));
+static int			 uao_get __P((struct uvm_object *, vaddr_t,
+					      vm_page_t *, int *, int, 
+					      vm_prot_t, int, int));
+static boolean_t		 uao_releasepg __P((struct vm_page *, 
+						    struct vm_page **));
+
+
+
+/*
+ * aobj_pager
+ * 
+ * note that some functions (e.g. put) are handled elsewhere
+ */
+
+struct uvm_pagerops aobj_pager = {
+	uao_init,		/* init */
+	NULL,			/* attach */
+	uao_reference,		/* reference */
+	uao_detach,		/* detach */
+	NULL,			/* fault */
+	uao_flush,		/* flush */
+	uao_get,		/* get */
+	NULL,			/* asyncget */
+	NULL,			/* put (done by pagedaemon) */
+	NULL,			/* cluster */
+	NULL,			/* mk_pcluster */
+	uvm_shareprot,		/* shareprot */
+	NULL,			/* aiodone */
+	uao_releasepg		/* releasepg */
+};
+
+/*
+ * uao_list: global list of active aobjs, locked by uao_list_lock
+ */
+
+static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
+static simple_lock_data_t uao_list_lock;
+
+
+/*
+ * functions
+ */
+
+/*
+ * hash table/array related functions
+ */
+
+/*
+ * uao_find_swhash_elt: find (or create) a hash table entry for a page
+ * offset.
+ *
+ * => the object should be locked by the caller
+ */
+
+static struct uao_swhash_elt *
+uao_find_swhash_elt(aobj, pageidx, create)
+	struct uvm_aobj *aobj;
+	int pageidx;
+	boolean_t create;
+{
+	struct uao_swhash *swhash;
+	struct uao_swhash_elt *elt;
+	int page_tag;
+
+	swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
+	page_tag = UAO_SWHASH_ELT_TAG(pageidx);	/* tag to search for */
+
+	/*
+	 * now search the bucket for the requested tag
+	 */
+	for (elt = swhash->lh_first; elt != NULL; elt = elt->list.le_next) {
+		if (elt->tag == page_tag)
+			return(elt);
+	}
+
+	/* fail now if we are not allowed to create a new entry in the bucket */
+	if (!create)
+		return NULL;
+
+
+	/*
+	 * allocate a new entry for the bucket and init/insert it in
+	 */
+	elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK);
+	LIST_INSERT_HEAD(swhash, elt, list);
+	elt->tag = page_tag;
+	elt->count = 0;
+	bzero(elt->slots, sizeof(elt->slots));
+
+	return(elt);
+}
+
+/*
+ * uao_find_swslot: find the swap slot number for an aobj/pageidx
+ *
+ * => object must be locked by caller 
+ */
+__inline static int
+uao_find_swslot(aobj, pageidx)
+	struct uvm_aobj *aobj;
+	int pageidx;
+{
+
+	/*
+	 * if noswap flag is set, then we never return a slot
+	 */
+
+	if (aobj->u_flags & UAO_FLAG_NOSWAP)
+		return(0);
+
+	/*
+	 * if hashing, look in hash table.
+	 */
+
+	if (UAO_USES_SWHASH(aobj)) {
+		struct uao_swhash_elt *elt =
+		    uao_find_swhash_elt(aobj, pageidx, FALSE);
+
+		if (elt)
+			return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
+		else
+			return(NULL);
+	}
+
+	/* 
+	 * otherwise, look in the array
+	 */
+	return(aobj->u_swslots[pageidx]);
+}
+
+/*
+ * uao_set_swslot: set the swap slot for a page in an aobj.
+ *
+ * => setting a slot to zero frees the slot
+ * => object must be locked by caller
+ */
+int
+uao_set_swslot(uobj, pageidx, slot)
+	struct uvm_object *uobj;
+	int pageidx, slot;
+{
+	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+	int oldslot;
+	UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
+	UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
+	    aobj, pageidx, slot, 0);
+
+	/*
+	 * if noswap flag is set, then we can't set a slot
+	 */
+
+	if (aobj->u_flags & UAO_FLAG_NOSWAP) {
+
+		if (slot == 0)
+			return(0);		/* a clear is ok */
+
+		/* but a set is not */
+		printf("uao_set_swslot: uobj = %p\n", uobj);
+	    panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
+	}
+
+	/*
+	 * are we using a hash table?  if so, add it in the hash.
+	 */
+
+	if (UAO_USES_SWHASH(aobj)) {
+		/*
+		 * Avoid allocating an entry just to free it again if
+		 * the page had not swap slot in the first place, and
+		 * we are freeing.
+		 */
+		struct uao_swhash_elt *elt =
+		    uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
+		if (elt == NULL) {
+#ifdef DIAGNOSTIC
+			if (slot)
+				panic("uao_set_swslot: didn't create elt");
+#endif
+			return (0);
+		}
+
+		oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
+		UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
+
+		/*
+		 * now adjust the elt's reference counter and free it if we've
+		 * dropped it to zero.
+		 */
+
+		/* an allocation? */
+		if (slot) {
+			if (oldslot == 0)
+				elt->count++;
+		} else {		/* freeing slot ... */
+			if (oldslot)	/* to be safe */
+				elt->count--;
+
+			if (elt->count == 0) {
+				LIST_REMOVE(elt, list);
+				pool_put(&uao_swhash_elt_pool, elt);
+			}
+		}
+
+	} else { 
+		/* we are using an array */
+		oldslot = aobj->u_swslots[pageidx];
+		aobj->u_swslots[pageidx] = slot;
+	}
+	return (oldslot);
+}
+
+/*
+ * end of hash/array functions
+ */
+
+/*
+ * uao_free: free all resources held by an aobj, and then free the aobj
+ *
+ * => the aobj should be dead
+ */
+static void
+uao_free(aobj)
+	struct uvm_aobj *aobj;
+{
+
+	if (UAO_USES_SWHASH(aobj)) {
+		int i, hashbuckets = aobj->u_swhashmask + 1;
+
+		/*
+		 * free the swslots from each hash bucket,
+		 * then the hash bucket, and finally the hash table itself.
+		 */
+		for (i = 0; i < hashbuckets; i++) {
+			struct uao_swhash_elt *elt, *next;
+
+			for (elt = aobj->u_swhash[i].lh_first; elt != NULL;
+			    elt = next) {
+				int j;
+
+				for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++)
+				{
+					int slot = elt->slots[j];
+
+					if (slot)
+						uvm_swap_free(slot, 1);
+				}
+
+				next = elt->list.le_next;
+				pool_put(&uao_swhash_elt_pool, elt);
+			}
+		}
+		FREE(aobj->u_swhash, M_UVMAOBJ);
+	} else {
+		int i;
+
+		/*
+		 * free the array
+		 */
+
+		for (i = 0; i < aobj->u_pages; i++)
+		{
+			int slot = aobj->u_swslots[i];
+
+			if (slot)
+				uvm_swap_free(slot, 1);
+		}
+		FREE(aobj->u_swslots, M_UVMAOBJ);
+	}
+
+	/*
+	 * finally free the aobj itself
+	 */
+	pool_put(&uvm_aobj_pool, aobj);
+}
+
+/*
+ * pager functions
+ */
+
+/*
+ * uao_create: create an aobj of the given size and return its uvm_object.
+ *
+ * => for normal use, flags are always zero
+ * => for the kernel object, the flags are:
+ *	UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
+ *	UAO_FLAG_KERNSWAP - enable swapping of kernel object ("           ")
+ */
+struct uvm_object *
+uao_create(size, flags)
+	vsize_t size;
+	int flags;
+{
+	static struct uvm_aobj kernel_object_store;	/* home of kernel_object */
+	static int kobj_alloced = 0;			/* not allocated yet */
+	int pages = round_page(size) >> PAGE_SHIFT;
+	struct uvm_aobj *aobj;
+
+	/*
+ 	* malloc a new aobj unless we are asked for the kernel object
+ 	*/
+	if (flags & UAO_FLAG_KERNOBJ) {		/* want kernel object? */
+		if (kobj_alloced)
+			panic("uao_create: kernel object already allocated");
+
+		/*
+		 * XXXTHORPEJ: Need to call this now, so the pool gets
+		 * initialized!
+		 */
+		uao_init();
+
+		aobj = &kernel_object_store;
+		aobj->u_pages = pages;
+		aobj->u_flags = UAO_FLAG_NOSWAP;	/* no swap to start */
+		/* we are special, we never die */
+		aobj->u_obj.uo_refs = UVM_OBJ_KERN;
+		kobj_alloced = UAO_FLAG_KERNOBJ;
+	} else if (flags & UAO_FLAG_KERNSWAP) {
+		aobj = &kernel_object_store;
+		if (kobj_alloced != UAO_FLAG_KERNOBJ)
+		    panic("uao_create: asked to enable swap on kernel object");
+		kobj_alloced = UAO_FLAG_KERNSWAP;
+	} else {	/* normal object */
+		aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
+		aobj->u_pages = pages;
+		aobj->u_flags = 0;		/* normal object */
+		aobj->u_obj.uo_refs = 1;	/* start with 1 reference */
+	}
+
+	/*
+ 	 * allocate hash/array if necessary
+ 	 *
+ 	 * note: in the KERNSWAP case no need to worry about locking since
+ 	 * we are still booting we should be the only thread around.
+ 	 */
+	if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
+		int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
+		    M_NOWAIT : M_WAITOK;
+
+		/* allocate hash table or array depending on object size */
+			if (UAO_USES_SWHASH(aobj)) {
+			aobj->u_swhash = newhashinit(UAO_SWHASH_BUCKETS(aobj),
+			    M_UVMAOBJ, mflags, &aobj->u_swhashmask);
+			if (aobj->u_swhash == NULL)
+				panic("uao_create: hashinit swhash failed");
+		} else {
+			MALLOC(aobj->u_swslots, int *, pages * sizeof(int),
+			    M_UVMAOBJ, mflags);
+			if (aobj->u_swslots == NULL)
+				panic("uao_create: malloc swslots failed");
+			bzero(aobj->u_swslots, pages * sizeof(int));
+		}
+
+		if (flags) {
+			aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
+			return(&aobj->u_obj);
+			/* done! */
+		}
+	}
+
+	/*
+ 	 * init aobj fields
+ 	 */
+	simple_lock_init(&aobj->u_obj.vmobjlock);
+	aobj->u_obj.pgops = &aobj_pager;
+	TAILQ_INIT(&aobj->u_obj.memq);
+	aobj->u_obj.uo_npages = 0;
+
+	/*
+ 	 * now that aobj is ready, add it to the global list
+ 	 * XXXCHS: uao_init hasn't been called'd in the KERNOBJ case,
+	 * do we really need the kernel object on this list anyway?
+ 	 */
+	simple_lock(&uao_list_lock);
+	LIST_INSERT_HEAD(&uao_list, aobj, u_list);
+	simple_unlock(&uao_list_lock);
+
+	/*
+ 	 * done!
+ 	 */
+	return(&aobj->u_obj);
+}
+
+
+
+/*
+ * uao_init: set up aobj pager subsystem
+ *
+ * => called at boot time from uvm_pager_init()
+ */
+static void
+uao_init()
+{
+	static int uao_initialized;
+
+	if (uao_initialized)
+		return;
+	uao_initialized = TRUE;
+
+	LIST_INIT(&uao_list);
+	simple_lock_init(&uao_list_lock);
+
+	/*
+	 * NOTE: Pages fror this pool must not come from a pageable
+	 * kernel map!
+	 */
+	pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
+	    0, 0, 0, "uaoeltpl", 0, NULL, NULL, M_UVMAOBJ);
+
+	pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0,
+	    "aobjpl", 0,
+	    pool_page_alloc_nointr, pool_page_free_nointr, M_UVMAOBJ);
+}
+
+/*
+ * uao_reference: add a ref to an aobj
+ *
+ * => aobj must be unlocked (we will lock it)
+ */
+void
+uao_reference(uobj)
+	struct uvm_object *uobj;
+{
+	UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
+
+	/*
+ 	 * kernel_object already has plenty of references, leave it alone.
+ 	 */
+
+	if (uobj->uo_refs == UVM_OBJ_KERN)
+		return;
+
+	simple_lock(&uobj->vmobjlock);
+	uobj->uo_refs++;		/* bump! */
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
+	uobj, uobj->uo_refs,0,0);
+	simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * uao_detach: drop a reference to an aobj
+ *
+ * => aobj must be unlocked, we will lock it
+ */
+void
+uao_detach(uobj)
+	struct uvm_object *uobj;
+{
+	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+	struct vm_page *pg;
+	boolean_t busybody;
+	UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
+
+	/*
+ 	 * detaching from kernel_object is a noop.
+ 	 */
+	if (uobj->uo_refs == UVM_OBJ_KERN)
+		return;
+
+	simple_lock(&uobj->vmobjlock);
+
+	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
+	uobj->uo_refs--;				/* drop ref! */
+	if (uobj->uo_refs) {				/* still more refs? */
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
+		return;
+	}
+
+	/*
+ 	 * remove the aobj from the global list.
+ 	 */
+	simple_lock(&uao_list_lock);
+	LIST_REMOVE(aobj, u_list);
+	simple_unlock(&uao_list_lock);
+
+	/*
+ 	 * free all the pages that aren't PG_BUSY, mark for release any that are.
+ 	 */
+
+	busybody = FALSE;
+	for (pg = uobj->memq.tqh_first ; pg != NULL ; pg = pg->listq.tqe_next) {
+		int swslot;
+
+		if (pg->flags & PG_BUSY) {
+			pg->flags |= PG_RELEASED;
+			busybody = TRUE;
+			continue;
+		}
+
+
+		/* zap the mappings, free the swap slot, free the page */
+		pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+
+		swslot = uao_set_swslot(&aobj->u_obj,
+					pg->offset >> PAGE_SHIFT, 0);
+		if (swslot) {
+			uvm_swap_free(swslot, 1);
+		}
+
+		uvm_lock_pageq();
+		uvm_pagefree(pg);
+		uvm_unlock_pageq();
+	}
+
+	/*
+ 	 * if we found any busy pages, we're done for now.
+ 	 * mark the aobj for death, releasepg will finish up for us.
+ 	 */
+	if (busybody) {
+		aobj->u_flags |= UAO_FLAG_KILLME;
+		simple_unlock(&aobj->u_obj.vmobjlock);
+		return;
+	}
+
+	/*
+ 	 * finally, free the rest.
+ 	 */
+	uao_free(aobj);
+}
+
+/*
+ * uao_flush: uh, yea, sure it's flushed.  really!
+ */
+boolean_t
+uao_flush(uobj, start, end, flags)
+	struct uvm_object *uobj;
+	vaddr_t start, end;
+	int flags;
+{
+
+	/*
+ 	 * anonymous memory doesn't "flush"
+ 	 */
+	/*
+ 	 * XXX
+ 	 * deal with PGO_DEACTIVATE (for madvise(MADV_SEQUENTIAL))
+ 	 * and PGO_FREE (for msync(MSINVALIDATE))
+ 	 */
+	return TRUE;
+}
+
+/*
+ * uao_get: fetch me a page
+ *
+ * we have three cases:
+ * 1: page is resident     -> just return the page.
+ * 2: page is zero-fill    -> allocate a new page and zero it.
+ * 3: page is swapped out  -> fetch the page from swap.
+ *
+ * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
+ * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
+ * then we will need to return VM_PAGER_UNLOCK.
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked!  we will _unlock_ it before starting any I/O.
+ * => flags: PGO_ALLPAGES: get all of the pages
+ *           PGO_LOCKED: fault data structures are locked
+ * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
+ * => NOTE: caller must check for released pages!!
+ */
+static int
+uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	struct vm_page **pps;
+	int *npagesp;
+	int centeridx, advice, flags;
+	vm_prot_t access_type;
+{
+	struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+	vaddr_t current_offset;
+	vm_page_t ptmp;
+	int lcv, gotpages, maxpages, swslot, rv;
+	boolean_t done;
+	UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d", aobj, offset, flags,0);
+	
+	/*
+ 	 * get number of pages
+ 	 */
+
+	maxpages = *npagesp;
+
+	/*
+ 	 * step 1: handled the case where fault data structures are locked.
+ 	 */
+
+	if (flags & PGO_LOCKED) {
+
+		/*
+ 		 * step 1a: get pages that are already resident.   only do
+		 * this if the data structures are locked (i.e. the first
+		 * time through).
+ 		 */
+
+		done = TRUE;	/* be optimistic */
+		gotpages = 0;	/* # of pages we got so far */
+
+		for (lcv = 0, current_offset = offset ; lcv < maxpages ;
+		    lcv++, current_offset += PAGE_SIZE) {
+			/* do we care about this page?  if not, skip it */
+			if (pps[lcv] == PGO_DONTCARE)
+				continue;
+
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/*
+ 			 * if page is new, attempt to allocate the page, then
+			 * zero-fill it.
+ 			 */
+			if (ptmp == NULL && uao_find_swslot(aobj,
+			    current_offset >> PAGE_SHIFT) == 0) {
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL);
+				if (ptmp) {
+					/* new page */
+					ptmp->flags &= ~(PG_BUSY|PG_FAKE);
+					ptmp->pqflags |= PQ_AOBJ;
+					UVM_PAGE_OWN(ptmp, NULL);
+					uvm_pagezero(ptmp);
+				}
+			}
+
+			/*
+			 * to be useful must get a non-busy, non-released page
+			 */
+			if (ptmp == NULL ||
+			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				if (lcv == centeridx ||
+				    (flags & PGO_ALLPAGES) != 0)
+					/* need to do a wait or I/O! */
+					done = FALSE;	
+					continue;
+			}
+
+			/*
+			 * useful page: busy/lock it and plug it in our
+			 * result array
+			 */
+			/* caller must un-busy this page */
+			ptmp->flags |= PG_BUSY;	
+			UVM_PAGE_OWN(ptmp, "uao_get1");
+			pps[lcv] = ptmp;
+			gotpages++;
+
+		}	/* "for" lcv loop */
+
+		/*
+ 		 * step 1b: now we've either done everything needed or we
+		 * to unlock and do some waiting or I/O.
+ 		 */
+
+		UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
+
+		*npagesp = gotpages;
+		if (done)
+			/* bingo! */
+			return(VM_PAGER_OK);	
+		else
+			/* EEK!   Need to unlock and I/O */
+			return(VM_PAGER_UNLOCK);
+	}
+
+	/*
+ 	 * step 2: get non-resident or busy pages.
+ 	 * object is locked.   data structures are unlocked.
+ 	 */
+
+	for (lcv = 0, current_offset = offset ; lcv < maxpages ;
+	    lcv++, current_offset += PAGE_SIZE) {
+		/*
+		 * - skip over pages we've already gotten or don't want
+		 * - skip over pages we don't _have_ to get
+		 */
+		if (pps[lcv] != NULL ||
+		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
+			continue;
+
+		/*
+ 		 * we have yet to locate the current page (pps[lcv]).   we
+		 * first look for a page that is already at the current offset.
+		 * if we find a page, we check to see if it is busy or
+		 * released.  if that is the case, then we sleep on the page
+		 * until it is no longer busy or released and repeat the lookup.
+		 * if the page we found is neither busy nor released, then we
+		 * busy it (so we own it) and plug it into pps[lcv].   this
+		 * 'break's the following while loop and indicates we are
+		 * ready to move on to the next page in the "lcv" loop above.
+ 		 *
+ 		 * if we exit the while loop with pps[lcv] still set to NULL,
+		 * then it means that we allocated a new busy/fake/clean page
+		 * ptmp in the object and we need to do I/O to fill in the data.
+ 		 */
+
+		/* top of "pps" while loop */
+		while (pps[lcv] == NULL) {
+			/* look for a resident page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* not resident?   allocate one now (if we can) */
+			if (ptmp == NULL) {
+
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL);	/* alloc */
+
+				/* out of RAM? */
+				if (ptmp == NULL) {
+					simple_unlock(&uobj->vmobjlock);
+					UVMHIST_LOG(pdhist,
+					    "sleeping, ptmp == NULL\n",0,0,0,0);
+					uvm_wait("uao_getpage");
+					simple_lock(&uobj->vmobjlock);
+					/* goto top of pps while loop */
+					continue;	
+				}
+
+				/*
+				 * safe with PQ's unlocked: because we just
+				 * alloc'd the page
+				 */
+				ptmp->pqflags |= PQ_AOBJ;
+
+				/* 
+				 * got new page ready for I/O.  break pps while
+				 * loop.  pps[lcv] is still NULL.
+				 */
+				break;
+			}
+
+			/* page is there, see if we need to wait on it */
+			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				ptmp->flags |= PG_WANTED;
+				UVMHIST_LOG(pdhist,
+				    "sleeping, ptmp->flags 0x%x\n",
+				    ptmp->flags,0,0,0);
+				UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock, 0,
+				    "uao_get", 0);
+				simple_lock(&uobj->vmobjlock);
+				continue;	/* goto top of pps while loop */
+			}
+			
+			/* 
+ 			 * if we get here then the page has become resident and
+			 * unbusy between steps 1 and 2.  we busy it now (so we
+			 * own it) and set pps[lcv] (so that we exit the while
+			 * loop).
+ 			 */
+			/* we own it, caller must un-busy */
+			ptmp->flags |= PG_BUSY;
+			UVM_PAGE_OWN(ptmp, "uao_get2");
+			pps[lcv] = ptmp;
+		}
+
+		/*
+ 		 * if we own the valid page at the correct offset, pps[lcv] will
+ 		 * point to it.   nothing more to do except go to the next page.
+ 		 */
+		if (pps[lcv])
+			continue;			/* next lcv */
+
+		/*
+ 		 * we have a "fake/busy/clean" page that we just allocated.  
+ 		 * do the needed "i/o", either reading from swap or zeroing.
+ 		 */
+		swslot = uao_find_swslot(aobj, current_offset >> PAGE_SHIFT);
+
+		/*
+ 		 * just zero the page if there's nothing in swap.
+ 		 */
+		if (swslot == 0)
+		{
+			/*
+			 * page hasn't existed before, just zero it.
+			 */
+			uvm_pagezero(ptmp);
+		}
+		else
+		{
+			UVMHIST_LOG(pdhist, "pagein from swslot %d",
+			     swslot, 0,0,0);
+
+			/*
+			 * page in the swapped-out page.
+			 * unlock object for i/o, relock when done.
+			 */
+			simple_unlock(&uobj->vmobjlock);
+			rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
+			simple_lock(&uobj->vmobjlock);
+
+			/*
+			 * I/O done.  check for errors.
+			 */
+			if (rv != VM_PAGER_OK)
+			{
+				UVMHIST_LOG(pdhist, "<- done (error=%d)",
+				    rv,0,0,0);
+				if (ptmp->flags & PG_WANTED)
+					/* object lock still held */
+					thread_wakeup(ptmp);
+				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+				UVM_PAGE_OWN(ptmp, NULL);
+				uvm_lock_pageq();
+				uvm_pagefree(ptmp);
+				uvm_unlock_pageq();
+				simple_unlock(&uobj->vmobjlock);
+				return (rv);
+			}
+		}
+
+		/* 
+ 		 * we got the page!   clear the fake flag (indicates valid
+		 * data now in page) and plug into our result array.   note
+		 * that page is still busy.   
+ 		 *
+ 		 * it is the callers job to:
+ 		 * => check if the page is released
+ 		 * => unbusy the page
+ 		 * => activate the page
+ 		 */
+
+		ptmp->flags &= ~PG_FAKE;		/* data is valid ... */
+		pmap_clear_modify(PMAP_PGARG(ptmp));	/* ... and clean */
+		pps[lcv] = ptmp;
+
+	}	/* lcv loop */
+
+	/*
+ 	 * finally, unlock object and return.
+ 	 */
+
+	simple_unlock(&uobj->vmobjlock);
+	UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
+	return(VM_PAGER_OK);
+}
+
+/*
+ * uao_releasepg: handle released page in an aobj
+ * 
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ *      to dispose of.
+ * => caller must handle PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ *      killed the page's object.    if we return TRUE, then we
+ *      return with the object locked.
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ *                              with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the aobj if it is not referenced and we are suppose to
+ *      kill it ("KILLME").
+ */
+static boolean_t uao_releasepg(pg, nextpgp)
+	struct vm_page *pg;
+	struct vm_page **nextpgp;	/* OUT */
+{
+	struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
+	int slot;
+
+#ifdef DIAGNOSTIC
+	if ((pg->flags & PG_RELEASED) == 0)
+		panic("uao_releasepg: page not released!");
+#endif
+	
+	/*
+ 	 * dispose of the page [caller handles PG_WANTED] and swap slot.
+ 	 */
+	pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+	slot = uao_set_swslot(&aobj->u_obj, pg->offset >> PAGE_SHIFT, 0);
+	if (slot)
+		uvm_swap_free(slot, 1);
+	uvm_lock_pageq();
+	if (nextpgp)
+		*nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
+	uvm_pagefree(pg);
+	if (!nextpgp)
+		uvm_unlock_pageq();			/* keep locked for daemon */
+
+	/*
+ 	 * if we're not killing the object, we're done.
+ 	 */
+	if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
+		return TRUE;
+
+#ifdef DIAGNOSTIC
+	if (aobj->u_obj.uo_refs)
+		panic("uvm_km_releasepg: kill flag set on referenced object!");
+#endif
+
+	/*
+ 	 * if there are still pages in the object, we're done for now.
+ 	 */
+	if (aobj->u_obj.uo_npages != 0)
+		return TRUE;
+
+#ifdef DIAGNOSTIC
+	if (aobj->u_obj.memq.tqh_first)
+		panic("uvn_releasepg: pages in object with npages == 0");
+#endif
+
+	/*
+ 	 * finally, free the rest.
+ 	 */
+	uao_free(aobj);
+
+	return FALSE;
+}
diff --git a/sys/uvm/uvm_aobj.h b/sys/uvm/uvm_aobj.h
new file mode 100644
index 00000000000..61beadb2157
--- /dev/null
+++ b/sys/uvm/uvm_aobj.h
@@ -0,0 +1,77 @@
+/*	$NetBSD: uvm_aobj.h,v 1.6 1998/02/12 07:36:45 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
+ *                    Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_aobj.h,v 1.1.2.4 1998/02/06 05:19:28 chs Exp
+ */
+/*
+ * uvm_aobj.h: anonymous memory uvm_object pager
+ *
+ * author: Chuck Silvers <chuq@chuq.com>
+ * started: Jan-1998
+ *
+ * - design mostly from Chuck Cranor
+ */
+
+#ifndef _UVM_UVM_AOBJ_H_
+#define _UVM_UVM_AOBJ_H_
+
+/*
+ * flags
+ */
+
+/* flags for uao_create: can only be used one time (at bootup) */
+#define UAO_FLAG_KERNOBJ	0x1	/* create kernel object */
+#define UAO_FLAG_KERNSWAP	0x2	/* enable kernel swap */
+
+/* internal flags */
+#define UAO_FLAG_KILLME		0x4	/* aobj should die when last released
+					 * page is no longer PG_BUSY ... */
+#define UAO_FLAG_NOSWAP		0x8	/* aobj can't swap (kernel obj only!) */
+
+/*
+ * prototypes
+ */
+
+int uao_set_swslot __P((struct uvm_object *, int, int));
+
+/*
+ * globals
+ */
+
+extern struct uvm_pagerops aobj_pager;
+
+#endif /* _UVM_UVM_AOBJ_H_ */
diff --git a/sys/uvm/uvm_ddb.h b/sys/uvm/uvm_ddb.h
new file mode 100644
index 00000000000..7c82bdf0dd6
--- /dev/null
+++ b/sys/uvm/uvm_ddb.h
@@ -0,0 +1,56 @@
+/*	$NetBSD: uvm_ddb.h,v 1.1 1998/07/04 22:18:53 jonathan Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_extern.h,v 1.1.2.21 1998/02/07 01:16:53 chs Exp
+ */
+
+#ifndef _UVM_UVM_DDB_H_
+#define _UVM_UVM_DDB_H_
+
+#if defined(DDB)
+void			uvm_map_print __P((vm_map_t, boolean_t));
+void			uvm_map_printit __P((vm_map_t, boolean_t,
+				int (*) __P((const char *, ...))));
+
+void			uvm_object_print __P((struct uvm_object *, boolean_t));
+void			uvm_object_printit __P((struct uvm_object *, boolean_t,
+				int (*) __P((const char *, ...))));
+void			uvm_page_print __P((struct vm_page *, boolean_t));
+void			uvm_page_printit __P((struct vm_page *, boolean_t,
+				int (*) __P((const char *, ...))));
+#endif
+#endif _UVM_UVM_DDB_H_
diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c
new file mode 100644
index 00000000000..6c249c42877
--- /dev/null
+++ b/sys/uvm/uvm_device.c
@@ -0,0 +1,507 @@
+/*	$NetBSD: uvm_device.c,v 1.11 1998/11/19 05:23:26 mrg Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_device.c,v 1.1.2.9 1998/02/06 05:11:47 chs Exp
+ */
+
+/*
+ * uvm_device.c: the device pager.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_device.h>
+
+/*
+ * private global data structure
+ *
+ * we keep a list of active device objects in the system.
+ */
+
+LIST_HEAD(udv_list_struct, uvm_device);
+static struct udv_list_struct udv_list;
+static simple_lock_data_t udv_lock;
+
+/*
+ * functions
+ */
+
+static void		udv_init __P((void));
+struct uvm_object 	*udv_attach __P((void *, vm_prot_t));
+static void             udv_reference __P((struct uvm_object *));
+static void             udv_detach __P((struct uvm_object *));
+static int		udv_fault __P((struct uvm_faultinfo *, vaddr_t,
+				       vm_page_t *, int, int, vm_fault_t,
+				       vm_prot_t, int));
+static boolean_t        udv_flush __P((struct uvm_object *, vaddr_t, 
+					 vaddr_t, int));
+static int		udv_asyncget __P((struct uvm_object *, vaddr_t,
+					    int));
+static int		udv_put __P((struct uvm_object *, vm_page_t *,
+					int, boolean_t));
+
+/*
+ * master pager structure
+ */
+
+struct uvm_pagerops uvm_deviceops = {
+	udv_init,
+	udv_attach,
+	udv_reference,
+	udv_detach,
+	udv_fault,
+	udv_flush,
+	NULL,		/* no get function since we have udv_fault */
+	udv_asyncget,
+	udv_put,
+	NULL,		/* no cluster function */
+	NULL,		/* no put cluster function */
+	NULL,		/* no share protect.   no share maps for us */
+	NULL,		/* no AIO-DONE function since no async i/o */
+	NULL,		/* no releasepg function since no normal pages */
+};
+
+/*
+ * the ops!
+ */
+
+/*
+ * udv_init
+ *
+ * init pager private data structures.
+ */
+
+void
+udv_init()
+{
+
+	LIST_INIT(&udv_list);
+	simple_lock_init(&udv_lock);
+}
+
+/*
+ * udv_attach
+ *
+ * get a VM object that is associated with a device.   allocate a new
+ * one if needed.
+ *
+ * => caller must _not_ already be holding the lock on the uvm_object.
+ * => in fact, nothing should be locked so that we can sleep here.
+ */
+struct uvm_object *
+udv_attach(arg, accessprot)
+	void *arg;
+	vm_prot_t accessprot;
+{
+	dev_t device = *((dev_t *) arg);
+	struct uvm_device *udv, *lcv;
+	int (*mapfn) __P((dev_t, int, int));
+	UVMHIST_FUNC("udv_attach"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(device=0x%x)", device,0,0,0);
+
+	/*
+	 * before we do anything, ensure this device supports mmap
+	 */
+
+	mapfn = cdevsw[major(device)].d_mmap;
+	if (mapfn == NULL ||
+			mapfn == (int (*) __P((dev_t, int, int))) enodev ||
+			mapfn == (int (*) __P((dev_t, int, int))) nullop)
+		return(NULL);
+
+	/*
+	 * keep looping until we get it
+	 */
+
+	while (1) {
+
+		/*
+		 * first, attempt to find it on the main list 
+		 */
+
+		simple_lock(&udv_lock);
+		for (lcv = udv_list.lh_first ; lcv != NULL ; lcv = lcv->u_list.le_next) {
+			if (device == lcv->u_device)
+				break;
+		}
+
+		/*
+		 * got it on main list.  put a hold on it and unlock udv_lock.
+		 */
+
+		if (lcv) {
+
+			/*
+			 * if someone else has a hold on it, sleep and start
+			 * over again.
+			 */
+
+			if (lcv->u_flags & UVM_DEVICE_HOLD) {
+				lcv->u_flags |= UVM_DEVICE_WANTED;
+				UVM_UNLOCK_AND_WAIT(lcv, &udv_lock, FALSE,
+				    "udv_attach",0);
+				continue;
+			}
+
+			/* we are now holding it */
+			lcv->u_flags |= UVM_DEVICE_HOLD;
+			simple_unlock(&udv_lock);
+
+			/*
+			 * bump reference count, unhold, return.
+			 */
+
+			simple_lock(&lcv->u_obj.vmobjlock);
+			lcv->u_obj.uo_refs++;
+			simple_unlock(&lcv->u_obj.vmobjlock);
+			
+			simple_lock(&udv_lock);
+			if (lcv->u_flags & UVM_DEVICE_WANTED)
+				wakeup(lcv);
+			lcv->u_flags &= ~(UVM_DEVICE_WANTED|UVM_DEVICE_HOLD);
+			simple_unlock(&udv_lock);
+			return(&lcv->u_obj);
+		}
+
+		/*
+		 * did not find it on main list.   need to malloc a new one.
+		 */
+
+		simple_unlock(&udv_lock);
+		/* NOTE: we could sleep in the following malloc() */
+		MALLOC(udv, struct uvm_device *, sizeof(*udv), M_TEMP, M_WAITOK);
+		simple_lock(&udv_lock);
+
+		/*
+		 * now we have to double check to make sure no one added it
+		 * to the list while we were sleeping...
+		 */
+
+		for (lcv = udv_list.lh_first ; lcv != NULL ;
+		    lcv = lcv->u_list.le_next) {
+			if (device == lcv->u_device)
+				break;
+		}
+
+		/*
+		 * did we lose a race to someone else?   free our memory and retry.
+		 */
+
+		if (lcv) {
+			simple_unlock(&udv_lock);
+			FREE(udv, M_TEMP);
+			continue;
+		}
+
+		/*
+		 * we have it!   init the data structures, add to list
+		 * and return.
+		 */
+
+		simple_lock_init(&udv->u_obj.vmobjlock);
+		udv->u_obj.pgops = &uvm_deviceops;
+		TAILQ_INIT(&udv->u_obj.memq);	/* not used, but be safe */
+		udv->u_obj.uo_npages = 0;
+		udv->u_obj.uo_refs = 1;
+		udv->u_flags = 0;
+		udv->u_device = device;
+		LIST_INSERT_HEAD(&udv_list, udv, u_list);
+		simple_unlock(&udv_lock);
+
+		return(&udv->u_obj);
+
+	}  /* while(1) loop */
+
+	/*NOTREACHED*/
+}
+	
+/*
+ * udv_reference
+ *
+ * add a reference to a VM object.   Note that the reference count must
+ * already be one (the passed in reference) so there is no chance of the
+ * udv being released or locked out here.
+ *
+ * => caller must call with object unlocked.
+ */
+
+static void
+udv_reference(uobj)
+	struct uvm_object *uobj;
+{
+	UVMHIST_FUNC("udv_reference"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);
+	uobj->uo_refs++;
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
+	uobj, uobj->uo_refs,0,0);
+	simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * udv_detach
+ *
+ * remove a reference to a VM object.
+ *
+ * => caller must call with object unlocked and map locked.
+ */
+
+static void
+udv_detach(uobj)
+	struct uvm_object *uobj;
+{
+	struct uvm_device *udv = (struct uvm_device *) uobj;
+	UVMHIST_FUNC("udv_detach"); UVMHIST_CALLED(maphist);
+
+	/*
+	 * loop until done
+	 */
+
+	while (1) {
+		simple_lock(&uobj->vmobjlock);
+		
+		if (uobj->uo_refs > 1) {
+			uobj->uo_refs--;			/* drop ref! */
+			simple_unlock(&uobj->vmobjlock);
+			UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d", 
+				  uobj,uobj->uo_refs,0,0);
+			return;
+		}
+
+#ifdef DIAGNOSTIC
+		if (uobj->uo_npages || uobj->memq.tqh_first)
+			panic("udv_detach: pages in a device object?");
+#endif
+
+		/*
+		 * now lock udv_lock
+		 */
+		simple_lock(&udv_lock);
+
+		/*
+		 * is it being held?   if so, wait until others are done.
+		 */
+		if (udv->u_flags & UVM_DEVICE_HOLD) {
+
+			/*
+			 * want it
+			 */
+			udv->u_flags |= UVM_DEVICE_WANTED;
+			simple_unlock(&uobj->vmobjlock);
+			UVM_UNLOCK_AND_WAIT(udv, &udv_lock, FALSE, "udv_detach",0);
+			continue;
+		}
+
+		/*
+		 * got it!   nuke it now.
+		 */
+
+		LIST_REMOVE(udv, u_list);
+		if (udv->u_flags & UVM_DEVICE_WANTED)
+			wakeup(udv);
+		FREE(udv, M_TEMP);
+		break;	/* DONE! */
+
+	}	/* while (1) loop */
+
+	UVMHIST_LOG(maphist," <- done, freed uobj=0x%x", uobj,0,0,0);
+	return;
+}
+
+
+/*
+ * udv_flush
+ *
+ * flush pages out of a uvm object.   a no-op for devices.
+ */
+
+static boolean_t udv_flush(uobj, start, stop, flags)
+	struct uvm_object *uobj;
+	vaddr_t start, stop;
+	int flags;
+{
+
+	return(TRUE);
+}
+
+/*
+ * udv_fault: non-standard fault routine for device "pages"
+ *
+ * => rather than having a "get" function, we have a fault routine
+ *	since we don't return vm_pages we need full control over the
+ *	pmap_enter map in
+ * => all the usual fault data structured are locked by the caller
+ *	(i.e. maps(read), amap (if any), uobj)
+ * => on return, we unlock all fault data structures
+ * => flags: PGO_ALLPAGES: get all of the pages
+ *	     PGO_LOCKED: fault data structures are locked
+ *    XXX: currently PGO_LOCKED is always required ... consider removing
+ *	it as a flag
+ * => NOTE: vaddr is the VA of pps[0] in ufi->entry, _NOT_ pps[centeridx]
+ */
+
+static int
+udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
+	struct uvm_faultinfo *ufi;
+	vaddr_t vaddr;
+	vm_page_t *pps;
+	int npages, centeridx, flags;
+	vm_fault_t fault_type;
+	vm_prot_t access_type;
+{
+	struct vm_map_entry *entry = ufi->entry;
+	struct uvm_object *uobj = entry->object.uvm_obj;
+	struct uvm_device *udv = (struct uvm_device *)uobj;
+	vaddr_t curr_offset, curr_va;
+	paddr_t paddr;
+	int lcv, retval, mdpgno;
+	dev_t device;
+	int (*mapfn) __P((dev_t, int, int));
+	UVMHIST_FUNC("udv_fault"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"  flags=%d", flags,0,0,0);
+
+	/*
+	 * XXX: !PGO_LOCKED calls are currently not allowed (or used)
+	 */
+
+	if ((flags & PGO_LOCKED) == 0)
+		panic("udv_fault: !PGO_LOCKED fault");
+
+	/*
+	 * we do not allow device mappings to be mapped copy-on-write
+	 * so we kill any attempt to do so here.
+	 */
+	
+	if (UVM_ET_ISCOPYONWRITE(entry)) {
+		UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)", 
+		entry->etype, 0,0,0);
+		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
+		return(VM_PAGER_ERROR);
+	}
+
+	/*
+	 * get device map function.   
+	 */
+	device = udv->u_device;
+	mapfn = cdevsw[major(device)].d_mmap;
+
+	/*
+	 * now we must determine the offset in udv to use and the VA to
+	 * use for pmap_enter.  note that we always use orig_map's pmap
+	 * for pmap_enter (even if we have a submap).   since virtual
+	 * addresses in a submap must match the main map, this is ok.
+	 */
+	/* udv offset = (offset from start of entry) + entry's offset */
+	curr_offset = (vaddr - entry->start) + entry->offset;	
+	/* pmap va = vaddr (virtual address of pps[0]) */
+	curr_va = vaddr;
+	
+	/*
+	 * loop over the page range entering in as needed
+	 */
+
+	retval = VM_PAGER_OK;
+	for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
+	    curr_va += PAGE_SIZE) {
+		if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
+			continue;
+
+		if (pps[lcv] == PGO_DONTCARE)
+			continue;
+
+		mdpgno = (*mapfn)(device, (int)curr_offset, access_type);
+		if (mdpgno == -1) {
+			retval = VM_PAGER_ERROR;
+			break;
+		}
+		paddr = pmap_phys_address(mdpgno);
+		UVMHIST_LOG(maphist,
+		    "  MAPPING: device: pm=0x%x, va=0x%x, pa=0x%x, at=%d",
+		    ufi->orig_map->pmap, curr_va, (int)paddr, access_type);
+		pmap_enter(ufi->orig_map->pmap, curr_va, paddr, access_type, 0);
+
+	}
+
+	uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
+	return(retval);
+}
+
+/*
+ * udv_asyncget: start async I/O to bring pages into ram
+ *
+ * => caller must lock object(???XXX: see if this is best)
+ * => a no-op for devices
+ */
+
+static int
+udv_asyncget(uobj, offset, npages)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	int npages;
+{
+
+	return(KERN_SUCCESS);
+}
+
+/*
+ * udv_put: flush page data to backing store.
+ *
+ * => this function should never be called (since we never have any
+ *	page structures to "put")
+ */
+
+static int
+udv_put(uobj, pps, npages, flags)
+	struct uvm_object *uobj;
+	struct vm_page **pps;
+	int npages, flags;
+{
+
+	panic("udv_put: trying to page out to a device!");
+}
diff --git a/sys/uvm/uvm_device.h b/sys/uvm/uvm_device.h
new file mode 100644
index 00000000000..347e4cb1dac
--- /dev/null
+++ b/sys/uvm/uvm_device.h
@@ -0,0 +1,76 @@
+/*	$NetBSD: uvm_device.h,v 1.5 1998/03/09 00:58:56 mrg Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_device.h,v 1.1.2.2 1997/10/03 17:39:44 chuck Exp
+ */
+
+#ifndef _UVM_UVM_DEVICE_H_
+#define _UVM_UVM_DEVICE_H_
+
+/*
+ * uvm_device.h
+ *
+ * device handle into the VM system.
+ */
+
+/*
+ * the uvm_device structure.   object is put at the top of the data structure.
+ * this allows:
+ *   (struct uvm_device *) == (struct uvm_object *)
+ */
+
+struct uvm_device {
+	struct uvm_object u_obj;	/* the actual VM object */
+	int u_flags;			/* flags [LOCKED BY UDV_LOCK!] */
+	dev_t u_device;		/* our device */
+	LIST_ENTRY(uvm_device) u_list; /* list of device objects */
+};
+
+/*
+ * u_flags values
+ */
+
+#define UVM_DEVICE_HOLD		0x1	/* someone has a "hold" on it */
+#define UVM_DEVICE_WANTED	0x2	/* someone wants to put a "hold" on */
+
+/*
+ * prototypes
+ */
+
+struct uvm_object *udv_attach __P((void *, vm_prot_t));
+
+#endif /* _UVM_UVM_DEVICE_H_ */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
new file mode 100644
index 00000000000..bcec521f665
--- /dev/null
+++ b/sys/uvm/uvm_extern.h
@@ -0,0 +1,386 @@
+/*	$NetBSD: uvm_extern.h,v 1.21 1998/09/08 23:44:21 thorpej Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_extern.h,v 1.1.2.21 1998/02/07 01:16:53 chs Exp
+ */
+
+#ifndef _UVM_UVM_EXTERN_H_
+#define _UVM_UVM_EXTERN_H_
+
+/*
+ * uvm_extern.h: this file defines the external interface to the VM system.
+ *
+ * this should be the only file included by non-VM parts of the kernel
+ * which need access to VM services.   if you want to know the interface
+ * to the MI VM layer without knowing the details, this is the file to
+ * learn.
+ *
+ * NOTE: vm system calls are prototyped in syscallargs.h
+ */
+
+/*
+ * defines
+ */
+
+/*
+ * the following defines are for uvm_map and functions which call it.
+ */
+
+/* protections bits */
+#define UVM_PROT_MASK	0x07	/* protection mask */
+#define UVM_PROT_NONE	0x00	/* protection none */
+#define UVM_PROT_ALL	0x07	/* everything */
+#define UVM_PROT_READ	0x01	/* read */
+#define UVM_PROT_WRITE  0x02	/* write */
+#define UVM_PROT_EXEC	0x04	/* exec */
+
+/* protection short codes */
+#define UVM_PROT_R	0x01	/* read */
+#define UVM_PROT_W	0x02	/* write */
+#define UVM_PROT_RW	0x03    /* read-write */
+#define UVM_PROT_X	0x04	/* exec */
+#define UVM_PROT_RX	0x05	/* read-exec */
+#define UVM_PROT_WX	0x06	/* write-exec */
+#define UVM_PROT_RWX	0x07	/* read-write-exec */
+
+/* 0x08: not used */
+
+/* inherit codes */
+#define UVM_INH_MASK	0x30	/* inherit mask */
+#define UVM_INH_SHARE	0x00	/* "share" */
+#define UVM_INH_COPY	0x10	/* "copy" */
+#define UVM_INH_NONE	0x20	/* "none" */
+#define UVM_INH_DONATE	0x30	/* "donate" << not used */
+
+/* 0x40, 0x80: not used */
+
+/* bits 0x700: max protection, 0x800: not used */
+
+/* bits 0x7000: advice, 0x8000: not used */
+/* advice: matches MADV_* from sys/mman.h */
+#define UVM_ADV_NORMAL	0x0	/* 'normal' */
+#define UVM_ADV_RANDOM	0x1	/* 'random' */
+#define UVM_ADV_SEQUENTIAL 0x2	/* 'sequential' */
+/* 0x3: will need, 0x4: dontneed */
+#define UVM_ADV_MASK	0x7	/* mask */
+
+/* mapping flags */
+#define UVM_FLAG_FIXED   0x010000 /* find space */
+#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
+#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
+#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
+#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
+#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
+
+/* macros to extract info */
+#define UVM_PROTECTION(X)	((X) & UVM_PROT_MASK)
+#define UVM_INHERIT(X)		(((X) & UVM_INH_MASK) >> 4)
+#define UVM_MAXPROTECTION(X)	(((X) >> 8) & UVM_PROT_MASK)
+#define UVM_ADVICE(X)		(((X) >> 12) & UVM_ADV_MASK)
+
+#define UVM_MAPFLAG(PROT,MAXPROT,INH,ADVICE,FLAGS) \
+	((MAXPROT << 8)|(PROT)|(INH)|((ADVICE) << 12)|(FLAGS))
+
+/* magic offset value */
+#define UVM_UNKNOWN_OFFSET ((vaddr_t) -1)
+				/* offset not known(obj) or don't care(!obj) */
+
+/*
+ * the following defines are for uvm_km_kmemalloc's flags
+ */
+
+#define UVM_KMF_NOWAIT	0x1			/* matches M_NOWAIT */
+#define UVM_KMF_VALLOC	0x2			/* allocate VA only */
+#define UVM_KMF_TRYLOCK	UVM_FLAG_TRYLOCK	/* try locking only */
+
+/*
+ * the following defines the strategies for uvm_pagealloc_strat()
+ */
+#define	UVM_PGA_STRAT_NORMAL	0	/* high -> low free list walk */
+#define	UVM_PGA_STRAT_ONLY	1	/* only specified free list */
+#define	UVM_PGA_STRAT_FALLBACK	2	/* ONLY falls back on NORMAL */
+
+/*
+ * structures
+ */
+
+struct core;
+struct mount;
+struct pglist;
+struct proc;
+struct ucred;
+struct uio;
+struct uvm_object;
+struct vm_anon;
+struct vmspace;
+struct pmap;
+struct vnode;
+
+/*
+ * uvmexp: global data structures that are exported to parts of the kernel
+ * other than the vm system.
+ */
+
+struct uvmexp {
+	/* vm_page constants */
+	int pagesize;   /* size of a page (PAGE_SIZE): must be power of 2 */
+	int pagemask;   /* page mask */
+	int pageshift;  /* page shift */
+
+	/* vm_page counters */
+	int npages;     /* number of pages we manage */
+	int free;       /* number of free pages */
+	int active;     /* number of active pages */
+	int inactive;   /* number of pages that we free'd but may want back */
+	int paging;	/* number of pages in the process of being paged out */
+	int wired;      /* number of wired pages */
+	int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
+	int reserve_kernel; /* number of pages reserved for kernel */
+
+	/* pageout params */
+	int freemin;    /* min number of free pages */
+	int freetarg;   /* target number of free pages */
+	int inactarg;   /* target number of inactive pages */
+	int wiredmax;   /* max number of wired pages */
+
+	/* swap */
+	int nswapdev;	/* number of configured swap devices in system */
+	int swpages;	/* number of PAGE_SIZE'ed swap pages */
+	int swpginuse;	/* number of swap pages in use */
+	int nswget;	/* number of times fault calls uvm_swap_get() */
+	int nanon;	/* number total of anon's in system */
+	int nfreeanon;	/* number of free anon's */
+
+	/* stat counters */
+	int faults;		/* page fault count */
+	int traps;		/* trap count */
+	int intrs;		/* interrupt count */
+	int swtch;		/* context switch count */
+	int softs;		/* software interrupt count */
+	int syscalls;		/* system calls */
+	int pageins;		/* pagein operation count */
+				/* pageouts are in pdpageouts below */
+	int swapins;		/* swapins */
+	int swapouts;		/* swapouts */
+	int pgswapin;		/* pages swapped in */
+	int pgswapout;		/* pages swapped out */
+	int forks;  		/* forks */
+	int forks_ppwait;	/* forks where parent waits */
+	int forks_sharevm;	/* forks where vmspace is shared */
+
+	/* fault subcounters */
+	int fltnoram;	/* number of times fault was out of ram */
+	int fltnoanon;	/* number of times fault was out of anons */
+	int fltpgwait;	/* number of times fault had to wait on a page */
+	int fltpgrele;	/* number of times fault found a released page */
+	int fltrelck;	/* number of times fault relock called */
+	int fltrelckok;	/* number of times fault relock is a success */
+	int fltanget;	/* number of times fault gets anon page */
+	int fltanretry;	/* number of times fault retrys an anon get */
+	int fltamcopy;	/* number of times fault clears "needs copy" */
+	int fltnamap;	/* number of times fault maps a neighbor anon page */
+	int fltnomap;	/* number of times fault maps a neighbor obj page */
+	int fltlget;	/* number of times fault does a locked pgo_get */
+	int fltget;	/* number of times fault does an unlocked get */
+	int flt_anon;	/* number of times fault anon (case 1a) */
+	int flt_acow;	/* number of times fault anon cow (case 1b) */
+	int flt_obj;	/* number of times fault is on object page (2a) */
+	int flt_prcopy;	/* number of times fault promotes with copy (2b) */
+	int flt_przero;	/* number of times fault promotes with zerofill (2b) */
+
+	/* daemon counters */
+	int pdwoke;	/* number of times daemon woke up */
+	int pdrevs;	/* number of times daemon rev'd clock hand */
+	int pdswout;	/* number of times daemon called for swapout */
+	int pdfreed;	/* number of pages daemon freed since boot */
+	int pdscans;	/* number of pages daemon scaned since boot */
+	int pdanscan;	/* number of anonymous pages scanned by daemon */
+	int pdobscan;	/* number of object pages scanned by daemon */
+	int pdreact;	/* number of pages daemon reactivated since boot */
+	int pdbusy;	/* number of times daemon found a busy page */
+	int pdpageouts;	/* number of times daemon started a pageout */
+	int pdpending;	/* number of times daemon got a pending pagout */
+	int pddeact;	/* number of pages daemon deactivates */
+	
+	/* kernel memory objects: managed by uvm_km_kmemalloc() only! */
+	struct uvm_object *kmem_object;
+	struct uvm_object *mb_object;
+};
+
+
+extern struct uvmexp uvmexp;
+
+/*
+ * macros
+ */
+
+/* zalloc zeros memory, alloc does not */
+#define uvm_km_zalloc(MAP,SIZE) uvm_km_alloc1(MAP,SIZE,TRUE)
+#define uvm_km_alloc(MAP,SIZE)  uvm_km_alloc1(MAP,SIZE,FALSE)
+
+/*
+ * typedefs 
+ */
+
+typedef unsigned int  uvm_flag_t;
+typedef int vm_fault_t;
+
+/* uvm_aobj.c */
+struct uvm_object	*uao_create __P((vsize_t, int));
+void			uao_detach __P((struct uvm_object *));
+void			uao_reference __P((struct uvm_object *));
+
+/* uvm_fault.c */
+int			uvm_fault __P((vm_map_t, vaddr_t, 
+				vm_fault_t, vm_prot_t));
+				/* handle a page fault */
+
+/* uvm_glue.c */
+#if defined(KGDB)
+void			uvm_chgkprot __P((caddr_t, size_t, int));
+#endif
+void			uvm_fork __P((struct proc *, struct proc *, boolean_t));
+void			uvm_exit __P((struct proc *));
+void			uvm_init_limits __P((struct proc *));
+boolean_t		uvm_kernacc __P((caddr_t, size_t, int));
+__dead void		uvm_scheduler __P((void)) __attribute__((noreturn));
+void			uvm_swapin __P((struct proc *));
+boolean_t		uvm_useracc __P((caddr_t, size_t, int));
+void			uvm_vslock __P((struct proc *, caddr_t, size_t));
+void			uvm_vsunlock __P((struct proc *, caddr_t, size_t));
+
+
+/* uvm_init.c */
+void			uvm_init __P((void));	
+				/* init the uvm system */
+
+/* uvm_io.c */
+int			uvm_io __P((vm_map_t, struct uio *));
+
+/* uvm_km.c */
+vaddr_t			uvm_km_alloc1 __P((vm_map_t, vsize_t, boolean_t));
+void			uvm_km_free __P((vm_map_t, vaddr_t, vsize_t));
+void			uvm_km_free_wakeup __P((vm_map_t, vaddr_t,
+						vsize_t));
+vaddr_t			uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *,
+						vsize_t, int));
+struct vm_map		*uvm_km_suballoc __P((vm_map_t, vaddr_t *,
+				vaddr_t *, vsize_t, boolean_t,
+				boolean_t, vm_map_t));
+vaddr_t			uvm_km_valloc __P((vm_map_t, vsize_t));
+vaddr_t			uvm_km_valloc_wait __P((vm_map_t, vsize_t));
+vaddr_t			uvm_km_alloc_poolpage1 __P((vm_map_t,
+				struct uvm_object *, boolean_t));
+void			uvm_km_free_poolpage1 __P((vm_map_t, vaddr_t));
+
+#define	uvm_km_alloc_poolpage(waitok)	uvm_km_alloc_poolpage1(kmem_map, \
+						uvmexp.kmem_object, (waitok))
+#define	uvm_km_free_poolpage(addr)	uvm_km_free_poolpage1(kmem_map, (addr))
+
+/* uvm_map.c */
+int			uvm_map __P((vm_map_t, vaddr_t *, vsize_t,
+				struct uvm_object *, vaddr_t, uvm_flag_t));
+int			uvm_map_pageable __P((vm_map_t, vaddr_t, 
+				vaddr_t, boolean_t));
+boolean_t		uvm_map_checkprot __P((vm_map_t, vaddr_t,
+				vaddr_t, vm_prot_t));
+int			uvm_map_protect __P((vm_map_t, vaddr_t, 
+				vaddr_t, vm_prot_t, boolean_t));
+struct vmspace		*uvmspace_alloc __P((vaddr_t, vaddr_t,
+				boolean_t));
+void			uvmspace_init __P((struct vmspace *, struct pmap *,
+				vaddr_t, vaddr_t, boolean_t));
+void			uvmspace_exec __P((struct proc *));
+struct vmspace		*uvmspace_fork __P((struct vmspace *));
+void			uvmspace_free __P((struct vmspace *));
+void			uvmspace_share __P((struct proc *, struct proc *));
+void			uvmspace_unshare __P((struct proc *));
+
+
+/* uvm_meter.c */
+void			uvm_meter __P((void));
+int			uvm_sysctl __P((int *, u_int, void *, size_t *, 
+				void *, size_t, struct proc *));
+void			uvm_total __P((struct vmtotal *));
+
+/* uvm_mmap.c */
+int			uvm_mmap __P((vm_map_t, vaddr_t *, vsize_t,
+				vm_prot_t, vm_prot_t, int, 
+				caddr_t, vaddr_t));
+
+/* uvm_page.c */
+struct vm_page		*uvm_pagealloc_strat __P((struct uvm_object *,
+				vaddr_t, struct vm_anon *, int, int));
+#define	uvm_pagealloc(obj, off, anon) \
+	    uvm_pagealloc_strat((obj), (off), (anon), UVM_PGA_STRAT_NORMAL, 0)
+void			uvm_pagerealloc __P((struct vm_page *, 
+					     struct uvm_object *, vaddr_t));
+/* Actually, uvm_page_physload takes PF#s which need their own type */
+void			uvm_page_physload __P((vaddr_t, vaddr_t,
+					       vaddr_t, vaddr_t, int));
+void			uvm_setpagesize __P((void));
+
+/* uvm_pdaemon.c */
+void			uvm_pageout __P((void));
+
+/* uvm_pglist.c */
+int			uvm_pglistalloc __P((psize_t, paddr_t,
+				paddr_t, paddr_t, paddr_t,
+				struct pglist *, int, int)); 
+void			uvm_pglistfree __P((struct pglist *));
+
+/* uvm_swap.c */
+void			uvm_swap_init __P((void));
+
+/* uvm_unix.c */
+int			uvm_coredump __P((struct proc *, struct vnode *, 
+				struct ucred *, struct core *));
+int			uvm_grow __P((struct proc *, vaddr_t));
+
+/* uvm_user.c */
+int			uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t));
+
+/* uvm_vnode.c */
+void			uvm_vnp_setsize __P((struct vnode *, u_quad_t));
+void			uvm_vnp_sync __P((struct mount *));
+void 			uvm_vnp_terminate __P((struct vnode *));
+				/* terminate a uvm/uvn object */
+boolean_t		uvm_vnp_uncache __P((struct vnode *));
+struct uvm_object	*uvn_attach __P((void *, vm_prot_t));
+
+#endif /* _UVM_UVM_EXTERN_H_ */
+
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
new file mode 100644
index 00000000000..10978e8c14e
--- /dev/null
+++ b/sys/uvm/uvm_fault.c
@@ -0,0 +1,1747 @@
+/*	$NetBSD: uvm_fault.c,v 1.19 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
+ */
+
+/*
+ * uvm_fault.c: fault handler
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ *
+ * a word on page faults:
+ *
+ * types of page faults we handle:
+ *
+ * CASE 1: upper layer faults                   CASE 2: lower layer faults
+ *
+ *    CASE 1A         CASE 1B                  CASE 2A        CASE 2B
+ *    read/write1     write>1                  read/write   +-cow_write/zero
+ *         |             |                         |        |        
+ *      +--|--+       +--|--+     +-----+       +  |  +     | +-----+
+ * amap |  V  |       |  ----------->new|          |        | |  ^  |
+ *      +-----+       +-----+     +-----+       +  |  +     | +--|--+
+ *                                                 |        |    |
+ *      +-----+       +-----+                   +--|--+     | +--|--+
+ * uobj | d/c |       | d/c |                   |  V  |     +----|  |
+ *      +-----+       +-----+                   +-----+       +-----+
+ *
+ * d/c = don't care
+ * 
+ *   case [0]: layerless fault
+ *	no amap or uobj is present.   this is an error.
+ *
+ *   case [1]: upper layer fault [anon active]
+ *     1A: [read] or [write with anon->an_ref == 1]
+ *		I/O takes place in top level anon and uobj is not touched.
+ *     1B: [write with anon->an_ref > 1]
+ *		new anon is alloc'd and data is copied off ["COW"]
+ *
+ *   case [2]: lower layer fault [uobj]
+ *     2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
+ *		I/O takes place directly in object.
+ *     2B: [write to copy_on_write] or [read on NULL uobj]
+ *		data is "promoted" from uobj to a new anon.   
+ *		if uobj is null, then we zero fill.
+ *
+ * we follow the standard UVM locking protocol ordering:
+ *
+ * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ) 
+ * we hold a PG_BUSY page if we unlock for I/O
+ *
+ *
+ * the code is structured as follows:
+ *  
+ *     - init the "IN" params in the ufi structure
+ *   ReFault:
+ *     - do lookups [locks maps], check protection, handle needs_copy
+ *     - check for case 0 fault (error)
+ *     - establish "range" of fault
+ *     - if we have an amap lock it and extract the anons
+ *     - if sequential advice deactivate pages behind us
+ *     - at the same time check pmap for unmapped areas and anon for pages
+ *	 that we could map in (and do map it if found)
+ *     - check object for resident pages that we could map in
+ *     - if (case 2) goto Case2
+ *     - >>> handle case 1
+ *           - ensure source anon is resident in RAM
+ *           - if case 1B alloc new anon and copy from source
+ *           - map the correct page in
+ *   Case2:
+ *     - >>> handle case 2
+ *           - ensure source page is resident (if uobj)
+ *           - if case 2B alloc new anon and copy from source (could be zero
+ *		fill if uobj == NULL)
+ *           - map the correct page in
+ *     - done!
+ *
+ * note on paging:
+ *   if we have to do I/O we place a PG_BUSY page in the correct object,
+ * unlock everything, and do the I/O.   when I/O is done we must reverify
+ * the state of the world before assuming that our data structures are
+ * valid.   [because mappings could change while the map is unlocked]
+ *
+ *  alternative 1: unbusy the page in question and restart the page fault
+ *    from the top (ReFault).   this is easy but does not take advantage
+ *    of the information that we already have from our previous lookup, 
+ *    although it is possible that the "hints" in the vm_map will help here.
+ *
+ * alternative 2: the system already keeps track of a "version" number of
+ *    a map.   [i.e. every time you write-lock a map (e.g. to change a
+ *    mapping) you bump the version number up by one...]   so, we can save
+ *    the version number of the map before we release the lock and start I/O.
+ *    then when I/O is done we can relock and check the version numbers
+ *    to see if anything changed.    this might save us some over 1 because
+ *    we don't have to unbusy the page and may be less compares(?).
+ *
+ * alternative 3: put in backpointers or a way to "hold" part of a map
+ *    in place while I/O is in progress.   this could be complex to
+ *    implement (especially with structures like amap that can be referenced
+ *    by multiple map entries, and figuring out what should wait could be
+ *    complex as well...).
+ *
+ * given that we are not currently multiprocessor or multithreaded we might
+ * as well choose alternative 2 now.   maybe alternative 3 would be useful
+ * in the future.    XXX keep in mind for future consideration//rechecking.
+ */
+
+/*
+ * local data structures
+ */
+
+struct uvm_advice {
+	int advice;
+	int nback;
+	int nforw;
+};
+
+/*
+ * page range array:
+ * note: index in array must match "advice" value 
+ * XXX: borrowed numbers from freebsd.   do they work well for us?
+ */
+
+static struct uvm_advice uvmadvice[] = {
+	{ MADV_NORMAL, 3, 4 },
+	{ MADV_RANDOM, 0, 0 },
+	{ MADV_SEQUENTIAL, 8, 7},
+};
+
+#define UVM_MAXRANGE 16	/* must be max() of nback+nforw+1 */
+
+/*
+ * private prototypes
+ */
+
+static void uvmfault_amapcopy __P((struct uvm_faultinfo *));
+static __inline void uvmfault_anonflush __P((struct vm_anon **, int));
+
+/*
+ * inline functions
+ */
+
+/*
+ * uvmfault_anonflush: try and deactivate pages in specified anons
+ *
+ * => does not have to deactivate page if it is busy
+ */
+
+static __inline void
+uvmfault_anonflush(anons, n)
+	struct vm_anon **anons;
+	int n;
+{
+	int lcv;
+	struct vm_page *pg;
+	
+	for (lcv = 0 ; lcv < n ; lcv++) {
+		if (anons[lcv] == NULL)
+			continue;
+		simple_lock(&anons[lcv]->an_lock);
+		pg = anons[lcv]->u.an_page;
+		if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {
+			uvm_lock_pageq();
+			if (pg->wire_count == 0) {
+				pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+				uvm_pagedeactivate(pg);
+			}
+			uvm_unlock_pageq();
+		}
+		simple_unlock(&anons[lcv]->an_lock);
+	}
+}
+
+/*
+ * normal functions
+ */
+
+/*
+ * uvmfault_amapcopy: clear "needs_copy" in a map.
+ *
+ * => called with VM data structures unlocked (usually, see below)
+ * => we get a write lock on the maps and clear needs_copy for a VA
+ * => if we are out of RAM we sleep (waiting for more)
+ */
+
+static void
+uvmfault_amapcopy(ufi)
+	struct uvm_faultinfo *ufi;
+{
+
+	/*
+	 * while we haven't done the job
+	 */
+
+	while (1) {
+
+		/*
+		 * no mapping?  give up.
+		 */
+
+		if (uvmfault_lookup(ufi, TRUE) == FALSE)
+			return;
+
+		/*
+		 * copy if needed.
+		 */
+
+		if (UVM_ET_ISNEEDSCOPY(ufi->entry))
+			amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE, 
+				ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
+
+		/*
+		 * didn't work?  must be out of RAM.   unlock and sleep.
+		 */
+
+		if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
+			uvmfault_unlockmaps(ufi, TRUE);
+			uvm_wait("fltamapcopy");
+			continue;
+		}
+
+		/*
+		 * got it!   unlock and return.
+		 */
+		
+		uvmfault_unlockmaps(ufi, TRUE);
+		return;
+	}
+	/*NOTREACHED*/
+}
+
+/*
+ * uvmfault_anonget: get data in an anon into a non-busy, non-released
+ * page in that anon.
+ *
+ * => maps, amap, and anon locked by caller.
+ * => if we fail (result != VM_PAGER_OK) we unlock everything.
+ * => if we are successful, we return with everything still locked.
+ * => we don't move the page on the queues [gets moved later]
+ * => if we allocate a new page [we_own], it gets put on the queues.
+ *    either way, the result is that the page is on the queues at return time
+ * => for pages which are on loan from a uvm_object (and thus are not
+ *    owned by the anon): if successful, we return with the owning object
+ *    locked.   the caller must unlock this object when it unlocks everything
+ *    else.
+ */
+
+int uvmfault_anonget(ufi, amap, anon)
+	struct uvm_faultinfo *ufi;
+	struct vm_amap *amap;
+	struct vm_anon *anon;
+{
+	boolean_t we_own;	/* we own anon's page? */
+	boolean_t locked;	/* did we relock? */
+	struct vm_page *pg;
+	int result;
+	UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
+
+	result = 0;		/* XXX shut up gcc */
+	uvmexp.fltanget++;
+        /* bump rusage counters */
+	if (anon->u.an_page)
+		curproc->p_addr->u_stats.p_ru.ru_minflt++;
+	else
+		curproc->p_addr->u_stats.p_ru.ru_majflt++;
+
+	/* 
+	 * loop until we get it, or fail.
+	 */
+
+	while (1) {
+
+		we_own = FALSE;		/* TRUE if we set PG_BUSY on a page */
+		pg = anon->u.an_page;
+
+		/*
+		 * if there is a resident page and it is loaned, then anon
+		 * may not own it.   call out to uvm_anon_lockpage() to ensure
+		 * the real owner of the page has been identified and locked.
+		 */
+
+		if (pg && pg->loan_count)
+			pg = uvm_anon_lockloanpg(anon);
+
+		/*
+		 * page there?   make sure it is not busy/released.
+		 */
+
+		if (pg) {
+
+			/*
+			 * at this point, if the page has a uobject [meaning
+			 * we have it on loan], then that uobject is locked
+			 * by us!   if the page is busy, we drop all the
+			 * locks (including uobject) and try again.
+			 */
+
+			if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) {
+				UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
+				return (VM_PAGER_OK);
+			}
+			pg->flags |= PG_WANTED;
+			uvmexp.fltpgwait++;
+
+			/*
+			 * the last unlock must be an atomic unlock+wait on
+			 * the owner of page
+			 */
+			if (pg->uobject) {	/* owner is uobject ? */
+				uvmfault_unlockall(ufi, amap, NULL, anon);
+				UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
+				    0,0,0);
+				UVM_UNLOCK_AND_WAIT(pg,
+				    &pg->uobject->vmobjlock,
+				    FALSE, "anonget1",0);
+			} else {
+				/* anon owns page */
+				uvmfault_unlockall(ufi, amap, NULL, NULL);
+				UVMHIST_LOG(maphist, " unlock+wait on anon",0,
+				    0,0,0);
+				UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0,
+				    "anonget2",0);
+			}
+			/* ready to relock and try again */
+
+		} else {
+		
+			/*
+			 * no page, we must try and bring it in.
+			 */
+			pg = uvm_pagealloc(NULL, 0, anon);
+
+			if (pg == NULL) {		/* out of RAM.  */
+
+				uvmfault_unlockall(ufi, amap, NULL, anon);
+				uvmexp.fltnoram++;
+				UVMHIST_LOG(maphist, "  noram -- UVM_WAIT",0,
+				    0,0,0);
+				uvm_wait("flt_noram1");
+				/* ready to relock and try again */
+
+			} else {
+	
+				/* we set the PG_BUSY bit */
+				we_own = TRUE;	
+				uvmfault_unlockall(ufi, amap, NULL, anon);
+
+				/*
+				 * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN
+				 * page into the uvm_swap_get function with
+				 * all data structures unlocked.  note that
+				 * it is ok to read an_swslot here because
+				 * we hold PG_BUSY on the page.
+				 */
+				uvmexp.pageins++;
+				result = uvm_swap_get(pg, anon->an_swslot,
+				    PGO_SYNCIO);
+
+				/*
+				 * we clean up after the i/o below in the
+				 * "we_own" case
+				 */
+				/* ready to relock and try again */
+			}
+		}
+
+		/*
+		 * now relock and try again
+		 */
+
+		locked = uvmfault_relock(ufi);
+		if (locked) {
+			amap_lock(amap);
+		}
+		if (locked || we_own)
+			simple_lock(&anon->an_lock);
+
+		/*
+		 * if we own the page (i.e. we set PG_BUSY), then we need
+		 * to clean up after the I/O. there are three cases to
+		 * consider:
+		 *   [1] page released during I/O: free anon and ReFault.
+		 *   [2] I/O not OK.   free the page and cause the fault 
+		 *       to fail.
+		 *   [3] I/O OK!   activate the page and sync with the
+		 *       non-we_own case (i.e. drop anon lock if not locked).
+		 */
+		
+		if (we_own) {
+
+			if (pg->flags & PG_WANTED) {
+				/* still holding object lock */
+				thread_wakeup(pg);	
+			}
+			/* un-busy! */
+			pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+			UVM_PAGE_OWN(pg, NULL);
+
+			/* 
+			 * if we were RELEASED during I/O, then our anon is
+			 * no longer part of an amap.   we need to free the
+			 * anon and try again.
+			 */
+			if (pg->flags & PG_RELEASED) {
+				pmap_page_protect(PMAP_PGARG(pg),
+				    VM_PROT_NONE); /* to be safe */
+				simple_unlock(&anon->an_lock);
+				uvm_anfree(anon);	/* frees page for us */
+				if (locked)
+				  uvmfault_unlockall(ufi, amap, NULL, NULL);
+				uvmexp.fltpgrele++;
+				UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+				return (VM_PAGER_REFAULT);	/* refault! */
+			}
+
+			if (result != VM_PAGER_OK) {
+#ifdef DIAGNOSTIC
+				if (result == VM_PAGER_PEND)
+		panic("uvmfault_anonget: got PENDING for non-async I/O");
+#endif
+				/* remove page from anon */
+				anon->u.an_page = NULL;
+
+				/* 
+				 * note: page was never !PG_BUSY, so it
+				 * can't be mapped and thus no need to
+				 * pmap_page_protect it...
+				 */
+				uvm_lock_pageq();
+				uvm_pagefree(pg);
+				uvm_unlock_pageq();
+
+				if (locked)
+					uvmfault_unlockall(ufi, amap, NULL,
+					    anon);
+				else
+					simple_unlock(&anon->an_lock);
+				UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
+				return (VM_PAGER_ERROR);
+			}
+			
+			/*
+			 * must be OK, clear modify (already PG_CLEAN)
+			 * and activate
+			 */
+			pmap_clear_modify(PMAP_PGARG(pg));
+			uvm_lock_pageq();
+			uvm_pageactivate(pg);
+			uvm_unlock_pageq();
+			if (!locked)
+				simple_unlock(&anon->an_lock);
+		}
+
+		/*
+		 * we were not able to relock.   restart fault.
+		 */
+
+		if (!locked) {
+			UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+			return (VM_PAGER_REFAULT);
+		}
+
+		/*
+		 * verify no one has touched the amap and moved the anon on us.
+		 */
+
+		if (amap_lookup(&ufi->entry->aref, 
+		    ufi->orig_rvaddr - ufi->entry->start) != anon) {
+			
+			uvmfault_unlockall(ufi, amap, NULL, anon);
+			UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+			return (VM_PAGER_REFAULT);
+		}
+			
+		/*
+		 * try it again! 
+		 */
+
+		uvmexp.fltanretry++;
+		continue;
+
+	} /* while (1) */
+
+	/*NOTREACHED*/
+}
+
+/*
+ *   F A U L T   -   m a i n   e n t r y   p o i n t
+ */
+
+/*
+ * uvm_fault: page fault handler
+ *
+ * => called from MD code to resolve a page fault
+ * => VM data structures usually should be unlocked.   however, it is 
+ *	possible to call here with the main map locked if the caller
+ *	gets a write lock, sets it recusive, and then calls us (c.f.
+ *	uvm_map_pageable).   this should be avoided because it keeps
+ *	the map locked off during I/O.
+ */
+
+int
+uvm_fault(orig_map, vaddr, fault_type, access_type)
+	vm_map_t orig_map;
+	vaddr_t vaddr;
+	vm_fault_t fault_type;
+	vm_prot_t access_type;
+{
+	struct uvm_faultinfo ufi;
+	vm_prot_t enter_prot;
+	boolean_t wired, narrow, promote, locked, shadowed;
+	int npages, nback, nforw, centeridx, result, lcv, gotpages;
+	vaddr_t startva, objaddr, currva, offset;
+	paddr_t pa; 
+	struct vm_amap *amap;
+	struct uvm_object *uobj;
+	struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
+	struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage;
+	UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)",
+	      orig_map, vaddr, fault_type, access_type);
+
+	anon = NULL; /* XXX: shut up gcc */
+
+	uvmexp.faults++;	/* XXX: locking? */
+
+	/*
+	 * init the IN parameters in the ufi
+	 */
+
+	ufi.orig_map = orig_map;
+	ufi.orig_rvaddr = trunc_page(vaddr);
+	ufi.orig_size = PAGE_SIZE;	/* can't get any smaller than this */
+	if (fault_type == VM_FAULT_WIRE)
+		narrow = TRUE;		/* don't look for neighborhood
+					 * pages on wire */
+	else
+		narrow = FALSE;		/* normal fault */
+
+	/*
+	 * "goto ReFault" means restart the page fault from ground zero.
+	 */
+ReFault:
+
+	/*
+	 * lookup and lock the maps
+	 */
+
+	if (uvmfault_lookup(&ufi, FALSE) == FALSE) {
+		UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0);
+		return (KERN_INVALID_ADDRESS);
+	}
+	/* locked: maps(read) */
+
+	/*
+	 * check protection
+	 */
+
+	if ((ufi.entry->protection & access_type) != access_type) {
+		UVMHIST_LOG(maphist,
+		    "<- protection failure (prot=0x%x, access=0x%x)",
+		    ufi.entry->protection, access_type, 0, 0);
+		uvmfault_unlockmaps(&ufi, FALSE);
+		return (KERN_PROTECTION_FAILURE);
+	}
+
+	/*
+	 * "enter_prot" is the protection we want to enter the page in at.
+	 * for certain pages (e.g. copy-on-write pages) this protection can
+	 * be more strict than ufi.entry->protection.  "wired" means either
+	 * the entry is wired or we are fault-wiring the pg.
+	 */
+
+	enter_prot = ufi.entry->protection;
+	wired = (ufi.entry->wired_count != 0) || (fault_type == VM_FAULT_WIRE);
+	if (wired)
+		access_type = enter_prot; /* full access for wired */
+
+	/*
+	 * handle "needs_copy" case.   if we need to copy the amap we will
+	 * have to drop our readlock and relock it with a write lock.  (we
+	 * need a write lock to change anything in a map entry [e.g.
+	 * needs_copy]).
+	 */
+
+	if (UVM_ET_ISNEEDSCOPY(ufi.entry)) {
+		if ((access_type & VM_PROT_WRITE) ||
+		    (ufi.entry->object.uvm_obj == NULL)) {
+			/* need to clear */
+			UVMHIST_LOG(maphist,
+			    "  need to clear needs_copy and refault",0,0,0,0);
+			uvmfault_unlockmaps(&ufi, FALSE);
+			uvmfault_amapcopy(&ufi);
+			uvmexp.fltamcopy++;
+			goto ReFault;
+
+		} else {
+
+			/*
+			 * ensure that we pmap_enter page R/O since
+			 * needs_copy is still true
+			 */
+			enter_prot = enter_prot & ~VM_PROT_WRITE; 
+
+		}
+	}
+
+	/*
+	 * identify the players
+	 */
+
+	amap = ufi.entry->aref.ar_amap;	/* top layer */
+	uobj = ufi.entry->object.uvm_obj;	/* bottom layer */
+
+	/*
+	 * check for a case 0 fault.  if nothing backing the entry then
+	 * error now.
+	 */
+
+	if (amap == NULL && uobj == NULL) {
+		uvmfault_unlockmaps(&ufi, FALSE);
+		UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0);
+		return (KERN_INVALID_ADDRESS);
+	}
+
+	/*
+	 * establish range of interest based on advice from mapper
+	 * and then clip to fit map entry.   note that we only want
+	 * to do this the first time through the fault.   if we 
+	 * ReFault we will disable this by setting "narrow" to true.
+	 */
+
+	if (narrow == FALSE) {
+
+		/* wide fault (!narrow) */
+#ifdef DIAGNOSTIC
+		if (uvmadvice[ufi.entry->advice].advice != ufi.entry->advice)
+			panic("fault: advice mismatch!");
+#endif
+		nback = min(uvmadvice[ufi.entry->advice].nback,
+			    (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT);
+		startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT);
+		nforw = min(uvmadvice[ufi.entry->advice].nforw,
+			    ((ufi.entry->end - ufi.orig_rvaddr) >>
+			     PAGE_SHIFT) - 1);
+		/*
+		 * note: "-1" because we don't want to count the
+		 * faulting page as forw
+		 */
+		npages = nback + nforw + 1;
+		centeridx = nback;
+
+		narrow = FALSE;	/* ensure only once per-fault */
+
+	} else {
+		
+		/* narrow fault! */
+		nback = nforw = 0;
+		startva = ufi.orig_rvaddr;
+		npages = 1;
+		centeridx = 0;
+
+	}
+
+	/* locked: maps(read) */
+	UVMHIST_LOG(maphist, "  narrow=%d, back=%d, forw=%d, startva=0x%x",
+		    narrow, nback, nforw, startva);
+	UVMHIST_LOG(maphist, "  entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry,
+		    amap, uobj, 0);
+
+	/*
+	 * if we've got an amap, lock it and extract current anons.
+	 */
+
+	if (amap) {
+		amap_lock(amap);
+		anons = anons_store;
+		amap_lookups(&ufi.entry->aref, startva - ufi.entry->start,
+		    anons, npages);
+	} else {
+		anons = NULL;	/* to be safe */
+	}
+
+	/* locked: maps(read), amap(if there) */
+
+	/*
+	 * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
+	 * now and then forget about them (for the rest of the fault).
+	 */
+
+	if (ufi.entry->advice == MADV_SEQUENTIAL) {
+
+		UVMHIST_LOG(maphist, "  MADV_SEQUENTIAL: flushing backpages",
+		    0,0,0,0);
+		/* flush back-page anons? */
+		if (amap) 
+			uvmfault_anonflush(anons, nback);
+
+		/* flush object? */
+		if (uobj) {
+			objaddr =
+			    (startva - ufi.entry->start) + ufi.entry->offset;
+			simple_lock(&uobj->vmobjlock);
+			(void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr + 
+				    (nback << PAGE_SHIFT), PGO_DEACTIVATE);
+			simple_unlock(&uobj->vmobjlock);
+		}
+
+		/* now forget about the backpages */
+		if (amap)
+			anons += nback;
+		startva = startva + (nback << PAGE_SHIFT);
+		npages -= nback;
+		nback = centeridx = 0;
+	}
+
+	/* locked: maps(read), amap(if there) */
+
+	/*
+	 * map in the backpages and frontpages we found in the amap in hopes
+	 * of preventing future faults.    we also init the pages[] array as
+	 * we go.
+	 */
+
+	currva = startva;
+	shadowed = FALSE;
+	for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) {
+
+		/*
+		 * dont play with VAs that are already mapped
+		 * except for center)
+		 * XXX: return value of pmap_extract disallows PA 0
+		 */
+		if (lcv != centeridx) {
+			pa = pmap_extract(ufi.orig_map->pmap, currva);
+			if (pa != NULL) {
+				pages[lcv] = PGO_DONTCARE;
+				continue;
+			}
+		}
+
+		/*
+		 * unmapped or center page.   check if any anon at this level.
+		 */
+		if (amap == NULL || anons[lcv] == NULL) {
+			pages[lcv] = NULL;
+			continue;
+		}
+
+		/*
+		 * check for present page and map if possible.   re-activate it.
+		 */
+
+		pages[lcv] = PGO_DONTCARE;
+		if (lcv == centeridx) {		/* save center for later! */
+			shadowed = TRUE;
+			continue;
+		}
+		anon = anons[lcv];
+		simple_lock(&anon->an_lock);
+		/* ignore loaned pages */
+		if (anon->u.an_page && anon->u.an_page->loan_count == 0 &&
+			(anon->u.an_page->flags & (PG_RELEASED|PG_BUSY)) == 0) {
+			uvm_lock_pageq();
+			uvm_pageactivate(anon->u.an_page);	/* reactivate */
+			uvm_unlock_pageq();
+			UVMHIST_LOG(maphist,
+			    "  MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x",
+			    ufi.orig_map->pmap, currva, anon->u.an_page, 0);
+			uvmexp.fltnamap++;
+			pmap_enter(ufi.orig_map->pmap, currva,
+			    VM_PAGE_TO_PHYS(anon->u.an_page),
+			    (anon->an_ref > 1) ? VM_PROT_READ : enter_prot, 
+			    (ufi.entry->wired_count != 0));
+		}
+		simple_unlock(&anon->an_lock);
+	}
+
+	/* locked: maps(read), amap(if there) */
+	/* (shadowed == TRUE) if there is an anon at the faulting address */
+	UVMHIST_LOG(maphist, "  shadowed=%d, will_get=%d", shadowed, 
+	    (uobj && shadowed == FALSE),0,0);
+
+	/*
+	 * note that if we are really short of RAM we could sleep in the above
+	 * call to pmap_enter with everything locked.   bad?
+	 * XXXCDC: this is fixed in PMAP_NEW (no sleep alloc's in pmap)
+	 */
+	
+	/*
+	 * if the desired page is not shadowed by the amap and we have a
+	 * backing object, then we check to see if the backing object would
+	 * prefer to handle the fault itself (rather than letting us do it
+	 * with the usual pgo_get hook).  the backing object signals this by
+	 * providing a pgo_fault routine.
+	 */
+
+	if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) {
+
+		simple_lock(&uobj->vmobjlock);
+
+		/* locked: maps(read), amap (if there), uobj */
+		result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
+				    centeridx, fault_type, access_type,
+				    PGO_LOCKED);
+		/* locked: nothing, pgo_fault has unlocked everything */
+
+		if (result == VM_PAGER_OK)
+			return (KERN_SUCCESS);	/* pgo_fault did pmap enter */
+		else if (result == VM_PAGER_REFAULT)
+			goto ReFault;		/* try again! */
+		else
+			return (KERN_PROTECTION_FAILURE);
+	}
+
+	/*
+	 * now, if the desired page is not shadowed by the amap and we have
+	 * a backing object that does not have a special fault routine, then
+	 * we ask (with pgo_get) the object for resident pages that we care
+	 * about and attempt to map them in.  we do not let pgo_get block
+	 * (PGO_LOCKED).
+	 *
+	 * ("get" has the option of doing a pmap_enter for us)
+	 */
+
+	if (uobj && shadowed == FALSE) {
+		simple_lock(&uobj->vmobjlock);
+
+		/* locked (!shadowed): maps(read), amap (if there), uobj */
+		/*
+		 * the following call to pgo_get does _not_ change locking state
+		 */
+
+		uvmexp.fltlget++;
+		gotpages = npages;
+		result = uobj->pgops->pgo_get(uobj, ufi.entry->offset +
+				(startva - ufi.entry->start),
+				pages, &gotpages, centeridx,
+				UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+				VM_PROT_READ : access_type,
+				ufi.entry->advice, PGO_LOCKED);
+
+		/*
+		 * check for pages to map, if we got any
+		 */
+
+		uobjpage = NULL;
+
+		if (gotpages) {
+			currva = startva;
+			for (lcv = 0 ; lcv < npages ;
+			    lcv++, currva += PAGE_SIZE) {
+
+				if (pages[lcv] == NULL ||
+				    pages[lcv] == PGO_DONTCARE)
+					continue;
+
+#ifdef DIAGNOSTIC
+					/*
+					 * pager sanity check: pgo_get with
+					 * PGO_LOCKED should never return a
+					 * released page to us.
+					 */
+					if (pages[lcv]->flags & PG_RELEASED) 
+		panic("uvm_fault: pgo_get PGO_LOCKED gave us a RELEASED page");
+#endif
+
+					/*
+					 * if center page is resident and not
+					 * PG_BUSY|PG_RELEASED then pgo_get
+					 * made it PG_BUSY for us and gave
+					 * us a handle to it.   remember this
+					 * page as "uobjpage." (for later use).
+					 */
+
+					if (lcv == centeridx) {
+						uobjpage = pages[lcv];
+	UVMHIST_LOG(maphist, "  got uobjpage (0x%x) with locked get", 
+					    uobjpage, 0,0,0);
+						continue;
+				}
+	
+				/* 
+				 * note: calling pgo_get with locked data
+				 * structures returns us pages which are
+				 * neither busy nor released, so we don't
+				 * need to check for this.   we can just
+				 * directly enter the page (after moving it
+				 * to the head of the active queue [useful?]).
+				 */
+
+				uvm_lock_pageq();
+				uvm_pageactivate(pages[lcv]);	/* reactivate */
+				uvm_unlock_pageq();
+				UVMHIST_LOG(maphist,
+				  "  MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x",
+				  ufi.orig_map->pmap, currva, pages[lcv], 0);
+				uvmexp.fltnomap++;
+				pmap_enter(ufi.orig_map->pmap, currva,
+				    VM_PAGE_TO_PHYS(pages[lcv]),
+				    UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+				    VM_PROT_READ : enter_prot, wired);
+
+				/* 
+				 * NOTE: page can't be PG_WANTED or PG_RELEASED
+				 * because we've held the lock the whole time
+				 * we've had the handle.
+				 */
+				pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */
+				UVM_PAGE_OWN(pages[lcv], NULL);
+	 
+				/* done! */
+			}	/* for "lcv" loop */
+		}   /* "gotpages" != 0 */
+
+		/* note: object still _locked_ */
+	} else {
+		
+		uobjpage = NULL;
+
+	}
+
+	/* locked (shadowed): maps(read), amap */
+	/* locked (!shadowed): maps(read), amap(if there), 
+		 uobj(if !null), uobjpage(if !null) */
+
+	/*
+	 * note that at this point we are done with any front or back pages.
+	 * we are now going to focus on the center page (i.e. the one we've
+	 * faulted on).  if we have faulted on the top (anon) layer
+	 * [i.e. case 1], then the anon we want is anons[centeridx] (we have
+	 * not touched it yet).  if we have faulted on the bottom (uobj)
+	 * layer [i.e. case 2] and the page was both present and available,
+	 * then we've got a pointer to it as "uobjpage" and we've already
+	 * made it BUSY.
+	 */
+
+	/*
+	 * there are four possible cases we must address: 1A, 1B, 2A, and 2B
+	 */
+
+	/*
+	 * redirect case 2: if we are not shadowed, go to case 2.
+	 */
+
+	if (shadowed == FALSE) 
+		goto Case2;
+
+	/* locked: maps(read), amap */
+
+	/*
+	 * handle case 1: fault on an anon in our amap
+	 */
+
+	anon = anons[centeridx];
+	UVMHIST_LOG(maphist, "  case 1 fault: anon=0x%x", anon, 0,0,0);
+	simple_lock(&anon->an_lock);
+
+	/* locked: maps(read), amap, anon */
+
+	/*
+	 * no matter if we have case 1A or case 1B we are going to need to
+	 * have the anon's memory resident.   ensure that now.
+	 */
+
+	/*
+	 * let uvmfault_anonget do the dirty work.   if it fails (!OK) it will
+	 * unlock for us.   if it is OK, locks are still valid and locked.
+	 * also, if it is OK, then the anon's page is on the queues.
+	 * if the page is on loan from a uvm_object, then anonget will
+	 * lock that object for us if it does not fail.
+	 */
+
+	result = uvmfault_anonget(&ufi, amap, anon);
+
+	if (result == VM_PAGER_REFAULT)
+		goto ReFault;
+
+	if (result == VM_PAGER_AGAIN) {
+		tsleep((caddr_t)&lbolt, PVM, "fltagain1", 0);
+		goto ReFault;
+	}
+
+	if (result != VM_PAGER_OK)
+		return (KERN_PROTECTION_FAILURE);		/* XXX??? */
+
+	/*
+	 * uobj is non null if the page is on loan from an object (i.e. uobj)
+	 */
+
+	uobj = anon->u.an_page->uobject;	/* locked by anonget if !NULL */
+
+	/* locked: maps(read), amap, anon, uobj(if one) */
+
+	/*
+	 * special handling for loaned pages 
+	 */
+	if (anon->u.an_page->loan_count) {
+
+		if ((access_type & VM_PROT_WRITE) == 0) {
+			
+			/*
+			 * for read faults on loaned pages we just cap the
+			 * protection at read-only.
+			 */
+
+			enter_prot = enter_prot & ~VM_PROT_WRITE;
+
+		} else {
+			/*
+			 * note that we can't allow writes into a loaned page!
+			 *
+			 * if we have a write fault on a loaned page in an
+			 * anon then we need to look at the anon's ref count.
+			 * if it is greater than one then we are going to do
+			 * a normal copy-on-write fault into a new anon (this
+			 * is not a problem).  however, if the reference count
+			 * is one (a case where we would normally allow a
+			 * write directly to the page) then we need to kill
+			 * the loan before we continue.
+			 */
+
+			/* >1 case is already ok */
+			if (anon->an_ref == 1) {
+
+				/* get new un-owned replacement page */
+				pg = uvm_pagealloc(NULL, 0, NULL);
+				if (pg == NULL) {
+					uvmfault_unlockall(&ufi, amap, uobj,
+					    anon);
+					uvm_wait("flt_noram2");
+					goto ReFault;
+				}
+
+				/*
+				 * copy data, kill loan, and drop uobj lock
+				 * (if any)
+				 */
+				/* copy old -> new */
+				uvm_pagecopy(anon->u.an_page, pg);
+
+				/* force reload */
+				pmap_page_protect(PMAP_PGARG(anon->u.an_page),
+				    VM_PROT_NONE); 
+				uvm_lock_pageq();	  /* KILL loan */
+				if (uobj)
+					/* if we were loaning */
+					anon->u.an_page->loan_count--;
+				anon->u.an_page->uanon = NULL;
+				/* in case we owned */
+				anon->u.an_page->pqflags &= ~PQ_ANON;
+				uvm_unlock_pageq();
+				if (uobj) {
+					simple_unlock(&uobj->vmobjlock);
+					uobj = NULL;
+				}
+
+				/* install new page in anon */
+				anon->u.an_page = pg;
+				pg->uanon = anon;
+				pg->pqflags |= PQ_ANON;
+				pg->flags &= ~(PG_BUSY|PG_FAKE);
+				UVM_PAGE_OWN(pg, NULL);
+
+				/* done! */
+			}     /* ref == 1 */
+		}       /* write fault */
+	}         /* loan count */
+
+	/*
+	 * if we are case 1B then we will need to allocate a new blank
+	 * anon to transfer the data into.   note that we have a lock
+	 * on anon, so no one can busy or release the page until we are done.
+	 * also note that the ref count can't drop to zero here because
+	 * it is > 1 and we are only dropping one ref.
+	 *
+	 * in the (hopefully very rare) case that we are out of RAM we 
+	 * will unlock, wait for more RAM, and refault.    
+	 *
+	 * if we are out of anon VM we kill the process (XXX: could wait?).
+	 */
+
+	if ((access_type & VM_PROT_WRITE) != 0 && anon->an_ref > 1) {
+
+		UVMHIST_LOG(maphist, "  case 1B: COW fault",0,0,0,0);
+		uvmexp.flt_acow++;
+		oanon = anon;		/* oanon = old, locked anon */
+		anon = uvm_analloc();
+		if (anon)
+			pg = uvm_pagealloc(NULL, 0, anon);
+#ifdef __GNUC__
+		else
+			pg = NULL; /* XXX: gcc */
+#endif
+
+		/* check for out of RAM */
+		if (anon == NULL || pg == NULL) {
+			if (anon)
+				uvm_anfree(anon);
+			uvmfault_unlockall(&ufi, amap, uobj, oanon);
+			if (anon == NULL) {
+				UVMHIST_LOG(maphist,
+				    "<- failed.  out of VM",0,0,0,0);
+				uvmexp.fltnoanon++;
+				/* XXX: OUT OF VM, ??? */
+				return (KERN_RESOURCE_SHORTAGE);
+			}
+			uvmexp.fltnoram++;
+			uvm_wait("flt_noram3");	/* out of RAM, wait for more */
+			goto ReFault;
+		}
+
+		/* got all resources, replace anon with nanon */
+
+		uvm_pagecopy(oanon->u.an_page, pg);	/* pg now !PG_CLEAN */
+		pg->flags &= ~(PG_BUSY|PG_FAKE);	/* un-busy! new page */
+		UVM_PAGE_OWN(pg, NULL);
+		amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
+		    anon, 1);
+
+		/* deref: can not drop to zero here by defn! */
+		oanon->an_ref--;
+			 
+		/*
+		 * note: oanon still locked.   anon is _not_ locked, but we
+		 * have the sole references to in from amap which _is_ locked.
+		 * thus, no one can get at it until we are done with it.
+		 */
+
+	} else {
+		
+		uvmexp.flt_anon++;
+		oanon = anon;		/* old, locked anon is same as anon */
+		pg = anon->u.an_page;
+		if (anon->an_ref > 1)     /* disallow writes to ref > 1 anons */
+			enter_prot = enter_prot & ~VM_PROT_WRITE;
+
+	}
+
+	/* locked: maps(read), amap, anon */
+
+	/*
+	 * now map the page in ...
+	 * XXX: old fault unlocks object before pmap_enter.  this seems
+	 * suspect since some other thread could blast the page out from
+	 * under us between the unlock and the pmap_enter.
+	 */
+
+	UVMHIST_LOG(maphist, "  MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x",
+	    ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0);
+	pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
+	    enter_prot, wired);
+
+	/*
+	 * ... and update the page queues.
+	 */
+
+	uvm_lock_pageq();
+
+	if (fault_type == VM_FAULT_WIRE) {
+		uvm_pagewire(pg);
+	} else {
+		/* activate it */
+		uvm_pageactivate(pg);
+
+	}
+
+	uvm_unlock_pageq();
+
+	/*
+	 * done case 1!  finish up by unlocking everything and returning success
+	 */
+
+	uvmfault_unlockall(&ufi, amap, uobj, oanon);
+	return (KERN_SUCCESS);
+
+
+Case2:
+	/*
+	 * handle case 2: faulting on backing object or zero fill
+	 */
+
+	/*
+	 * locked:
+	 * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
+	 */
+
+	/*
+	 * note that uobjpage can not be PGO_DONTCARE at this point.  we now
+	 * set uobjpage to PGO_DONTCARE if we are doing a zero fill.  if we
+	 * have a backing object, check and see if we are going to promote
+	 * the data up to an anon during the fault.
+	 */
+
+	if (uobj == NULL) {
+		uobjpage = PGO_DONTCARE;	
+		promote = TRUE;		/* always need anon here */
+	} else {
+		/* assert(uobjpage != PGO_DONTCARE) */
+		promote = (access_type & VM_PROT_WRITE) &&
+		     UVM_ET_ISCOPYONWRITE(ufi.entry);
+	}
+	UVMHIST_LOG(maphist, "  case 2 fault: promote=%d, zfill=%d",
+	promote, (uobj == NULL), 0,0);
+
+	/*
+	 * if uobjpage is not null then we do not need to do I/O to get the
+	 * uobjpage.
+	 *
+	 * if uobjpage is null, then we need to unlock and ask the pager to 
+	 * get the data for us.   once we have the data, we need to reverify
+	 * the state the world.   we are currently not holding any resources.
+	 */
+
+	if (uobjpage) {
+		/* update rusage counters */
+		curproc->p_addr->u_stats.p_ru.ru_minflt++;
+	} else {
+		/* update rusage counters */
+		curproc->p_addr->u_stats.p_ru.ru_majflt++;
+		
+		/* locked: maps(read), amap(if there), uobj */
+		uvmfault_unlockall(&ufi, amap, NULL, NULL);
+		/* locked: uobj */
+
+		uvmexp.fltget++;
+		gotpages = 1;
+		result = uobj->pgops->pgo_get(uobj,
+		    (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset,
+		    &uobjpage, &gotpages, 0,
+		    UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+			VM_PROT_READ : access_type,
+			ufi.entry->advice, 0);
+
+		/* locked: uobjpage(if result OK) */
+		
+		/*
+		 * recover from I/O
+		 */
+
+		if (result != VM_PAGER_OK) {
+			
+#ifdef DIAGNOSTIC 
+			if (result == VM_PAGER_PEND)
+	panic("uvm_fault: pgo_get got PENDing on non-async I/O");
+#endif
+
+			if (result == VM_PAGER_AGAIN) {
+	UVMHIST_LOG(maphist, "  pgo_get says TRY AGAIN!",0,0,0,0);
+	tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
+	goto ReFault;
+			}
+
+			UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)",
+			    result, 0,0,0);
+			return (KERN_PROTECTION_FAILURE); /* XXX i/o error */
+		}
+
+		/* locked: uobjpage */
+
+		/*
+		 * re-verify the state of the world by first trying to relock
+		 * the maps.  always relock the object.
+		 */
+
+		locked = uvmfault_relock(&ufi);
+		if (locked && amap)
+			amap_lock(amap);
+		simple_lock(&uobj->vmobjlock);
+		
+		/* locked(locked): maps(read), amap(if !null), uobj, uobjpage */
+		/* locked(!locked): uobj, uobjpage */
+
+		/*
+		 * verify that the page has not be released and re-verify
+		 * that amap slot is still free.   if there is a problem,
+		 * we unlock and clean up.
+		 */
+
+		if ((uobjpage->flags & PG_RELEASED) != 0 ||
+		    (locked && amap && 
+		    amap_lookup(&ufi.entry->aref,
+		      ufi.orig_rvaddr - ufi.entry->start))) {
+			if (locked) 
+				uvmfault_unlockall(&ufi, amap, NULL, NULL);
+			locked = FALSE;
+		}
+
+		/*
+		 * didn't get the lock?   release the page and retry.
+		 */
+
+		if (locked == FALSE) {
+
+			UVMHIST_LOG(maphist,
+			    "  wasn't able to relock after fault: retry", 
+			    0,0,0,0);
+			if (uobjpage->flags & PG_WANTED)
+				/* still holding object lock */
+				thread_wakeup(uobjpage);
+
+			if (uobjpage->flags & PG_RELEASED) {
+				uvmexp.fltpgrele++;
+#ifdef DIAGNOSTIC
+				if (uobj->pgops->pgo_releasepg == NULL)
+			panic("uvm_fault: object has no releasepg function");
+#endif
+				/* frees page */
+				if (uobj->pgops->pgo_releasepg(uobjpage,NULL))
+					/* unlock if still alive */
+					simple_unlock(&uobj->vmobjlock);
+				goto ReFault;
+			}
+
+			uvm_lock_pageq();
+			/* make sure it is in queues */
+			uvm_pageactivate(uobjpage);
+
+			uvm_unlock_pageq();
+			uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(uobjpage, NULL);
+			simple_unlock(&uobj->vmobjlock);
+			goto ReFault;
+
+		}
+
+		/*
+		 * we have the data in uobjpage which is PG_BUSY and
+		 * !PG_RELEASED.  we are holding object lock (so the page
+		 * can't be released on us).
+		 */
+
+		/* locked: maps(read), amap(if !null), uobj, uobjpage */
+
+	}
+
+	/*
+	 * locked:
+	 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
+	 */
+
+	/*
+	 * notes:
+	 *  - at this point uobjpage can not be NULL
+	 *  - at this point uobjpage can not be PG_RELEASED (since we checked
+	 *  for it above)
+	 *  - at this point uobjpage could be PG_WANTED (handle later)
+	 */
+		
+	if (promote == FALSE) {
+
+		/*
+		 * we are not promoting.   if the mapping is COW ensure that we
+		 * don't give more access than we should (e.g. when doing a read
+		 * fault on a COPYONWRITE mapping we want to map the COW page in
+		 * R/O even though the entry protection could be R/W).
+		 *
+		 * set "pg" to the page we want to map in (uobjpage, usually)
+		 */
+
+		uvmexp.flt_obj++;
+		if (UVM_ET_ISCOPYONWRITE(ufi.entry))
+			enter_prot = enter_prot & ~VM_PROT_WRITE;
+		pg = uobjpage;		/* map in the actual object */
+
+		/* assert(uobjpage != PGO_DONTCARE) */
+
+		/*
+		 * we are faulting directly on the page.   be careful
+		 * about writing to loaned pages...
+		 */
+		if (uobjpage->loan_count) {
+
+			if ((access_type & VM_PROT_WRITE) == 0) {
+				/* read fault: cap the protection at readonly */
+				/* cap! */
+				enter_prot = enter_prot & ~VM_PROT_WRITE;
+			} else {
+				/* write fault: must break the loan here */
+
+				/* alloc new un-owned page */
+				pg = uvm_pagealloc(NULL, 0, NULL);
+
+				if (pg == NULL) {
+					/*
+					 * drop ownership of page, it can't
+					 * be released
+					 * */
+					if (uobjpage->flags & PG_WANTED)
+						thread_wakeup(uobjpage);
+					uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+					UVM_PAGE_OWN(uobjpage, NULL);
+
+					uvm_lock_pageq();
+					/* activate: we will need it later */
+					uvm_pageactivate(uobjpage);
+
+					uvm_unlock_pageq();
+					uvmfault_unlockall(&ufi, amap, uobj,
+					  NULL);
+					UVMHIST_LOG(maphist,
+					  "  out of RAM breaking loan, waiting",					  0,0,0,0);
+					uvmexp.fltnoram++;
+					uvm_wait("flt_noram4");
+					goto ReFault;
+				}
+
+				/*
+				 * copy the data from the old page to the new
+				 * one and clear the fake/clean flags on the
+				 * new page (keep it busy).  force a reload
+				 * of the old page by clearing it from all
+				 * pmaps.  then lock the page queues to
+				 * rename the pages.
+				 */
+				uvm_pagecopy(uobjpage, pg);	/* old -> new */
+				pg->flags &= ~(PG_FAKE|PG_CLEAN);
+				pmap_page_protect(PMAP_PGARG(uobjpage),
+				    VM_PROT_NONE); 
+				if (uobjpage->flags & PG_WANTED)
+					thread_wakeup(uobjpage);
+				/* uobj still locked */
+				uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
+				UVM_PAGE_OWN(uobjpage, NULL);
+
+				uvm_lock_pageq();
+				offset = uobjpage->offset;
+				/* remove old page */
+				uvm_pagerealloc(uobjpage, NULL, 0);
+
+				/*
+				 * at this point we have absolutely no
+				 * control over uobjpage
+				 */
+				/* install new page */
+				uvm_pagerealloc(pg, uobj, offset);
+				uvm_unlock_pageq();
+
+				/*
+				 * done!  loan is broken and "pg" is
+				 * PG_BUSY.   it can now replace uobjpage.
+				 */
+
+				uobjpage = pg;
+
+			}		/* write fault case */
+		}		/* if loan_count */
+
+	} else {
+		
+		/*
+		 * if we are going to promote the data to an anon we
+		 * allocate a blank anon here and plug it into our amap.
+		 */
+#if DIAGNOSTIC
+		if (amap == NULL)
+			panic("uvm_fault: want to promote data, but no anon");
+#endif
+
+		anon = uvm_analloc();
+		if (anon)
+			pg = uvm_pagealloc(NULL, 0, anon); /* BUSY+CLEAN+FAKE */
+#ifdef __GNUC__
+		else
+			pg = NULL; /* XXX: gcc */
+#endif
+
+		/*
+		 * out of memory resources?
+		 */
+		if (anon == NULL || pg == NULL) {
+
+			/*
+			 * arg!  must unbusy our page and fail or sleep.
+			 */
+			if (uobjpage != PGO_DONTCARE) {
+				if (uobjpage->flags & PG_WANTED)
+					/* still holding object lock */
+					thread_wakeup(uobjpage);
+
+				uvm_lock_pageq();
+				/* make sure it is in queues */
+				uvm_pageactivate(uobjpage);
+				uvm_unlock_pageq();
+				/* un-busy! (still locked) */
+				uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+				UVM_PAGE_OWN(uobjpage, NULL);
+			}
+
+			/* unlock and fail ... */
+			uvmfault_unlockall(&ufi, amap, uobj, NULL);
+			if (anon == NULL) {
+				UVMHIST_LOG(maphist, "  promote: out of VM",
+				    0,0,0,0);
+				uvmexp.fltnoanon++;
+				/* XXX: out of VM */
+				return (KERN_RESOURCE_SHORTAGE);
+			}
+			UVMHIST_LOG(maphist, "  out of RAM, waiting for more",
+			    0,0,0,0);
+			uvm_anfree(anon);
+			uvmexp.fltnoram++;
+			uvm_wait("flt_noram5");
+			goto ReFault;
+		}
+
+		/*
+		 * fill in the data
+		 */
+
+		if (uobjpage != PGO_DONTCARE) {
+			uvmexp.flt_prcopy++;
+			/* copy page [pg now dirty] */
+			uvm_pagecopy(uobjpage, pg);
+
+			/*
+			 * promote to shared amap?  make sure all sharing
+			 * procs see it
+			 */
+			if ((amap_flags(amap) & AMAP_SHARED) != 0) {
+				pmap_page_protect(PMAP_PGARG(uobjpage),
+				    VM_PROT_NONE);
+			}
+			
+			/*
+			 * dispose of uobjpage.  it can't be PG_RELEASED
+			 * since we still hold the object lock.   drop
+			 * handle to uobj as well.
+			 */
+
+			if (uobjpage->flags & PG_WANTED)
+				/* still have the obj lock */
+				thread_wakeup(uobjpage);
+			uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(uobjpage, NULL);
+			uvm_lock_pageq();
+			uvm_pageactivate(uobjpage);	/* put it back */
+			uvm_unlock_pageq();
+			simple_unlock(&uobj->vmobjlock);
+			uobj = NULL;
+			UVMHIST_LOG(maphist,
+			    "  promote uobjpage 0x%x to anon/page 0x%x/0x%x",
+			    uobjpage, anon, pg, 0);
+
+		} else {
+			uvmexp.flt_przero++;
+			uvm_pagezero(pg);	/* zero page [pg now dirty] */
+			UVMHIST_LOG(maphist,"  zero fill anon/page 0x%x/0%x",
+			    anon, pg, 0, 0);
+		}
+
+		amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
+		    anon, 0);
+		
+	}
+
+	/*
+	 * locked:
+	 * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
+	 *
+	 * note: pg is either the uobjpage or the new page in the new anon
+	 */
+
+	/*
+	 * all resources are present.   we can now map it in and free our
+	 * resources.
+	 */
+
+	UVMHIST_LOG(maphist,
+	    "  MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d",
+	    ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote);
+	pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
+	    enter_prot, wired);
+
+	uvm_lock_pageq();
+
+	if (fault_type == VM_FAULT_WIRE) {
+		uvm_pagewire(pg);
+	} else {
+		
+		/* activate it */
+		uvm_pageactivate(pg);
+
+	}
+
+	uvm_unlock_pageq();
+
+	if (pg->flags & PG_WANTED)
+		thread_wakeup(pg);		/* lock still held */
+
+	/* 
+	 * note that pg can't be PG_RELEASED since we did not drop the object 
+	 * lock since the last time we checked.
+	 */
+ 
+	pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
+	UVM_PAGE_OWN(pg, NULL);
+	uvmfault_unlockall(&ufi, amap, uobj, NULL);
+
+	UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0);
+	return (KERN_SUCCESS);
+}
+
+
+/*
+ * uvm_fault_wire: wire down a range of virtual addresses in a map.
+ *
+ * => map should be locked by caller?   If so how can we call
+ *	uvm_fault?   WRONG.
+ * => XXXCDC: locking here is all screwed up!!!  start with 
+ *	uvm_map_pageable and fix it.
+ */
+
+int
+uvm_fault_wire(map, start, end)
+	vm_map_t map;
+	vaddr_t start, end;
+{
+	vaddr_t va;
+	pmap_t  pmap;
+	int rv;
+
+	pmap = vm_map_pmap(map);
+
+	/*
+	 * call pmap pageable: this tells the pmap layer to lock down these
+	 * page tables.
+	 */
+
+	pmap_pageable(pmap, start, end, FALSE);
+
+	/*
+	 * now fault it in page at a time.   if the fault fails then we have
+	 * to undo what we have done.   note that in uvm_fault VM_PROT_NONE 
+	 * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
+	 */
+
+	for (va = start ; va < end ; va += PAGE_SIZE) {
+		rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE);
+		if (rv) {
+			if (va != start) {
+				uvm_fault_unwire(map->pmap, start, va);
+			}
+			return (rv);
+		}
+	}
+
+	return (KERN_SUCCESS);
+}
+
+/*
+ * uvm_fault_unwire(): unwire range of virtual space.
+ *
+ * => caller holds reference to pmap (via its map)
+ */
+
+void
+uvm_fault_unwire(pmap, start, end)
+	struct pmap *pmap;
+	vaddr_t start, end;
+{
+	vaddr_t va;
+	paddr_t pa;
+	struct vm_page *pg;
+
+	/*
+	 * we assume that the area we are unwiring has actually been wired
+	 * in the first place.   this means that we should be able to extract
+	 * the PAs from the pmap.   we also lock out the page daemon so that
+	 * we can call uvm_pageunwire.
+	 */
+	
+	uvm_lock_pageq();
+
+	for (va = start; va < end ; va += PAGE_SIZE) {
+		pa = pmap_extract(pmap, va);
+
+		/* XXX: assumes PA 0 cannot be in map */
+		if (pa == (paddr_t) 0) {
+			panic("uvm_fault_unwire: unwiring non-wired memory");
+		}
+		pmap_change_wiring(pmap, va, FALSE);  /* tell the pmap */
+		pg = PHYS_TO_VM_PAGE(pa);
+		if (pg)
+			uvm_pageunwire(pg);
+	}
+
+	uvm_unlock_pageq();
+
+	/*
+	 * now we call pmap_pageable to let the pmap know that the page tables
+	 * in this space no longer need to be wired.
+	 */
+
+	pmap_pageable(pmap, start, end, TRUE);
+
+}
diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h
new file mode 100644
index 00000000000..650543ea669
--- /dev/null
+++ b/sys/uvm/uvm_fault.h
@@ -0,0 +1,88 @@
+/*	$NetBSD: uvm_fault.h,v 1.7 1998/10/11 23:07:42 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault.h,v 1.1.2.2 1997/12/08 16:07:12 chuck Exp
+ */
+
+#ifndef _UVM_UVM_FAULT_H_
+#define _UVM_UVM_FAULT_H_
+
+/*
+ * fault types
+ */
+
+#define VM_FAULT_INVALID ((vm_fault_t) 0x0)	/* invalid mapping */
+#define VM_FAULT_PROTECT ((vm_fault_t) 0x1)	/* protection */
+#define VM_FAULT_WIRE	 ((vm_fault_t) 0x2)	/* wire mapping */
+
+/*
+ * fault data structures
+ */
+
+/*
+ * uvm_faultinfo: to load one of these fill in all orig_* fields and
+ * then call uvmfault_lookup on it.
+ */
+
+
+struct uvm_faultinfo {
+	vm_map_t orig_map;		/* IN: original map */
+	vaddr_t orig_rvaddr;		/* IN: original rounded VA */
+	vsize_t orig_size;		/* IN: original size of interest */
+	vm_map_t map;			/* map (could be a submap) */
+	unsigned int mapv;		/* map's version number */
+	vm_map_entry_t entry;		/* map entry (from 'map') */
+	vsize_t size;			/* size of interest */
+};
+
+/*
+ * fault prototypes
+ */
+
+
+int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *,
+													struct vm_anon *));
+static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t));
+static boolean_t uvmfault_relock __P((struct uvm_faultinfo *));
+static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *,
+			            struct uvm_object *, struct vm_anon *));
+static void uvmfault_unlockmaps __P((struct uvm_faultinfo *, boolean_t));
+
+int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t));
+void uvm_fault_unwire __P((struct pmap *, vaddr_t, vaddr_t));
+
+#endif /* _UVM_UVM_FAULT_H_ */
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
new file mode 100644
index 00000000000..40c5cddcef8
--- /dev/null
+++ b/sys/uvm/uvm_fault_i.h
@@ -0,0 +1,203 @@
+/*	$NetBSD: uvm_fault_i.h,v 1.7 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault_i.h,v 1.1.6.1 1997/12/08 16:07:12 chuck Exp
+ */
+
+#ifndef _UVM_UVM_FAULT_I_H_
+#define _UVM_UVM_FAULT_I_H_
+
+/*
+ * uvm_fault_i.h: fault inline functions
+ */
+
+/*
+ * uvmfault_unlockmaps: unlock the maps
+ */
+
+static __inline void
+uvmfault_unlockmaps(ufi, write_locked)
+	struct uvm_faultinfo *ufi;
+	boolean_t write_locked;
+{
+
+	if (write_locked) {
+		vm_map_unlock(ufi->map);
+	} else {
+		vm_map_unlock_read(ufi->map);
+	}
+}
+
+/*
+ * uvmfault_unlockall: unlock everything passed in.
+ *
+ * => maps must be read-locked (not write-locked).
+ */
+
+static __inline void
+uvmfault_unlockall(ufi, amap, uobj, anon)
+	struct uvm_faultinfo *ufi;
+	struct vm_amap *amap;
+	struct uvm_object *uobj;
+	struct vm_anon *anon;
+{
+
+	if (anon)
+		simple_unlock(&anon->an_lock);
+	if (uobj)
+		simple_unlock(&uobj->vmobjlock);
+	if (amap)
+		amap_unlock(amap);
+	uvmfault_unlockmaps(ufi, FALSE);
+}
+
+/*
+ * uvmfault_lookup: lookup a virtual address in a map
+ *
+ * => caller must provide a uvm_faultinfo structure with the IN
+ *	params properly filled in
+ * => we will lookup the map entry (handling submaps) as we go
+ * => if the lookup is a success we will return with the maps locked
+ * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
+ *	get a read lock.
+ * => note that submaps can only appear in the kernel and they are 
+ *	required to use the same virtual addresses as the map they
+ *	are referenced by (thus address translation between the main
+ *	map and the submap is unnecessary).
+ */
+
+static __inline boolean_t
+uvmfault_lookup(ufi, write_lock)
+	struct uvm_faultinfo *ufi;
+	boolean_t write_lock;
+{
+	vm_map_t tmpmap;
+
+	/*
+	 * init ufi values for lookup.
+	 */
+
+	ufi->map = ufi->orig_map;
+	ufi->size = ufi->orig_size;
+
+	/*
+	 * keep going down levels until we are done.   note that there can
+	 * only be two levels so we won't loop very long.
+	 */
+
+	while (1) {
+
+		/*
+		 * lock map
+		 */
+		if (write_lock) {
+			vm_map_lock(ufi->map);
+		} else {
+			vm_map_lock_read(ufi->map);
+		}
+
+		/*
+		 * lookup
+		 */
+		if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr, 
+								&ufi->entry)) {
+			uvmfault_unlockmaps(ufi, write_lock);
+			return(FALSE);
+		}
+
+		/*
+		 * reduce size if necessary
+		 */
+		if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
+			ufi->size = ufi->entry->end - ufi->orig_rvaddr;
+
+		/*
+		 * submap?    replace map with the submap and lookup again.
+		 * note: VAs in submaps must match VAs in main map.
+		 */
+		if (UVM_ET_ISSUBMAP(ufi->entry)) {
+			tmpmap = ufi->entry->object.sub_map;
+			if (write_lock) {
+				vm_map_unlock(ufi->map);
+			} else {
+				vm_map_unlock_read(ufi->map);
+			}
+			ufi->map = tmpmap;
+			continue;
+		}
+
+		/*
+		 * got it!
+		 */
+
+		ufi->mapv = ufi->map->timestamp;
+		return(TRUE);
+
+	}	/* while loop */
+
+	/*NOTREACHED*/
+}
+
+/*
+ * uvmfault_relock: attempt to relock the same version of the map
+ *
+ * => fault data structures should be unlocked before calling.
+ * => if a success (TRUE) maps will be locked after call.
+ */
+
+static __inline boolean_t
+uvmfault_relock(ufi)
+	struct uvm_faultinfo *ufi;
+{
+
+	uvmexp.fltrelck++;
+	/*
+	 * relock map.   fail if version mismatch (in which case nothing 
+	 * gets locked).
+	 */
+
+	vm_map_lock_read(ufi->map);
+	if (ufi->mapv != ufi->map->timestamp) {
+		vm_map_unlock_read(ufi->map);
+		return(FALSE);
+	}
+
+	uvmexp.fltrelckok++;
+	return(TRUE);		/* got it! */
+}
+
+#endif /* _UVM_UVM_FAULT_I_H_ */
diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c
new file mode 100644
index 00000000000..b46fd012c16
--- /dev/null
+++ b/sys/uvm/uvm_glue.c
@@ -0,0 +1,605 @@
+/*	$NetBSD: uvm_glue.c,v 1.15 1998/10/19 22:21:19 tron Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_glue.c	8.6 (Berkeley) 1/5/94
+ * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_glue.c: glue functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/buf.h>
+#include <sys/user.h>
+#ifdef SYSVSHM
+#include <sys/shm.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * local prototypes
+ */
+
+static void uvm_swapout __P((struct proc *));
+
+/*
+ * XXXCDC: do these really belong here?
+ */
+
+unsigned maxdmap = MAXDSIZ;	/* kern_resource.c: RLIMIT_DATA max */
+unsigned maxsmap = MAXSSIZ;	/* kern_resource.c: RLIMIT_STACK max */
+
+int readbuffers = 0;		/* allow KGDB to read kern buffer pool */
+				/* XXX: see uvm_kernacc */
+
+
+/*
+ * uvm_kernacc: can the kernel access a region of memory
+ *
+ * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
+ */
+
+boolean_t
+uvm_kernacc(addr, len, rw)
+	caddr_t addr;
+	size_t len;
+	int rw;
+{
+	boolean_t rv;
+	vaddr_t saddr, eaddr;
+	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+	saddr = trunc_page(addr);
+	eaddr = round_page(addr+len);
+	vm_map_lock_read(kernel_map);
+	rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
+	vm_map_unlock_read(kernel_map);
+
+	/*
+	 * XXX there are still some things (e.g. the buffer cache) that
+	 * are managed behind the VM system's back so even though an
+	 * address is accessible in the mind of the VM system, there may
+	 * not be physical pages where the VM thinks there is.  This can
+	 * lead to bogus allocation of pages in the kernel address space
+	 * or worse, inconsistencies at the pmap level.  We only worry
+	 * about the buffer cache for now.
+	 */
+	if (!readbuffers && rv && (eaddr > (vaddr_t)buffers &&
+			     saddr < (vaddr_t)buffers + MAXBSIZE * nbuf))
+		rv = FALSE;
+	return(rv);
+}
+
+/*
+ * uvm_useracc: can the user access it?
+ *
+ * - called from physio() and sys___sysctl().
+ */
+
+boolean_t
+uvm_useracc(addr, len, rw)
+	caddr_t addr;
+	size_t len;
+	int rw;
+{
+	boolean_t rv;
+	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+#if defined(i386) || defined(pc532)
+	/*
+	 * XXX - specially disallow access to user page tables - they are
+	 * in the map.  This is here until i386 & pc532 pmaps are fixed...
+	 */
+	if ((vaddr_t) addr >= VM_MAXUSER_ADDRESS
+	    || (vaddr_t) addr + len > VM_MAXUSER_ADDRESS
+	    || (vaddr_t) addr + len <= (vaddr_t) addr)
+		return (FALSE);
+#endif
+
+	rv = uvm_map_checkprot(&curproc->p_vmspace->vm_map,
+			trunc_page(addr), round_page(addr+len), prot);
+	return(rv);
+}
+
+#ifdef KGDB
+/*
+ * Change protections on kernel pages from addr to addr+len
+ * (presumably so debugger can plant a breakpoint).
+ *
+ * We force the protection change at the pmap level.  If we were
+ * to use vm_map_protect a change to allow writing would be lazily-
+ * applied meaning we would still take a protection fault, something
+ * we really don't want to do.  It would also fragment the kernel
+ * map unnecessarily.  We cannot use pmap_protect since it also won't
+ * enforce a write-enable request.  Using pmap_enter is the only way
+ * we can ensure the change takes place properly.
+ */
+void
+uvm_chgkprot(addr, len, rw)
+	register caddr_t addr;
+	size_t len;
+	int rw;
+{
+	vm_prot_t prot;
+	paddr_t pa;
+	vaddr_t sva, eva;
+
+	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
+	eva = round_page(addr + len);
+	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
+		/*
+		 * Extract physical address for the page.
+		 * We use a cheezy hack to differentiate physical
+		 * page 0 from an invalid mapping, not that it
+		 * really matters...
+		 */
+		pa = pmap_extract(pmap_kernel(), sva|1);
+		if (pa == 0)
+			panic("chgkprot: invalid page");
+		pmap_enter(pmap_kernel(), sva, pa&~1, prot, TRUE);
+	}
+}
+#endif
+
+/*
+ * vslock: wire user memory for I/O
+ *
+ * - called from physio and sys___sysctl
+ * - XXXCDC: consider nuking this (or making it a macro?)
+ */
+
+void
+uvm_vslock(p, addr, len)
+	struct proc *p;
+	caddr_t	addr;
+	size_t	len;
+{
+	uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr), 
+	    round_page(addr+len));
+}
+
+/*
+ * vslock: wire user memory for I/O
+ *
+ * - called from physio and sys___sysctl
+ * - XXXCDC: consider nuking this (or making it a macro?)
+ */
+
+void
+uvm_vsunlock(p, addr, len)
+	struct proc *p;
+	caddr_t	addr;
+	size_t	len;
+{
+	uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr), 
+		round_page(addr+len));
+}
+
+/*
+ * uvm_fork: fork a virtual address space
+ *
+ * - the address space is copied as per parent map's inherit values
+ * - a new "user" structure is allocated for the child process
+ *	[filled in by MD layer...]
+ * - NOTE: the kernel stack may be at a different location in the child
+ *	process, and thus addresses of automatic variables may be invalid
+ *	after cpu_fork returns in the child process.  We do nothing here
+ *	after cpu_fork returns.
+ * - XXXCDC: we need a way for this to return a failure value rather
+ *   than just hang
+ */
+void
+uvm_fork(p1, p2, shared)
+	struct proc *p1, *p2;
+	boolean_t shared;
+{
+	struct user *up = p2->p_addr;
+	int rv;
+
+	if (shared == TRUE)
+		uvmspace_share(p1, p2);			/* share vmspace */
+	else
+		p2->p_vmspace = uvmspace_fork(p1->p_vmspace); /* fork vmspace */
+
+	/*
+	 * Wire down the U-area for the process, which contains the PCB
+	 * and the kernel stack.  Wired state is stored in p->p_flag's
+	 * P_INMEM bit rather than in the vm_map_entry's wired count
+	 * to prevent kernel_map fragmentation.
+	 */
+	rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
+	    (vaddr_t)up + USPACE);
+	if (rv != KERN_SUCCESS)
+		panic("uvm_fork: uvm_fault_wire failed: %d", rv);
+
+	/*
+	 * p_stats and p_sigacts currently point at fields in the user
+	 * struct but not at &u, instead at p_addr.  Copy p_sigacts and
+	 * parts of p_stats; zero the rest of p_stats (statistics).
+	 */
+	p2->p_stats = &up->u_stats;
+	p2->p_sigacts = &up->u_sigacts;
+	up->u_sigacts = *p1->p_sigacts;
+	bzero(&up->u_stats.pstat_startzero,
+	(unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
+		    (caddr_t)&up->u_stats.pstat_startzero));
+	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy, 
+	((caddr_t)&up->u_stats.pstat_endcopy -
+	 (caddr_t)&up->u_stats.pstat_startcopy));
+	
+/*
+	 * cpu_fork will copy and update the kernel stack and pcb, and make
+	 * the child ready to run.  The child will exit directly to user
+	 * mode on its first time slice, and will not return here.
+	 */
+	cpu_fork(p1, p2);
+}
+
+/*
+ * uvm_exit: exit a virtual address space
+ *
+ * - the process passed to us is a dead (pre-zombie) process; we
+ *   are running on a different context now (the reaper).
+ * - we must run in a separate thread because freeing the vmspace
+ *   of the dead process may block.
+ */
+void
+uvm_exit(p)
+	struct proc *p;
+{
+
+	uvmspace_free(p->p_vmspace);
+	uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
+}
+
+/*
+ * uvm_init_limit: init per-process VM limits
+ *
+ * - called for process 0 and then inherited by all others.
+ */
+void
+uvm_init_limits(p)
+	struct proc *p;
+{
+
+	/*
+	 * Set up the initial limits on process VM.  Set the maximum
+	 * resident set size to be all of (reasonably) available memory.
+	 * This causes any single, large process to start random page
+	 * replacement once it fills memory.
+	 */
+
+	p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
+	p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
+	p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
+	p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
+	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
+}
+
+#ifdef DEBUG
+int	enableswap = 1;
+int	swapdebug = 0;
+#define	SDB_FOLLOW	1
+#define SDB_SWAPIN	2
+#define SDB_SWAPOUT	4
+#endif
+
+/*
+ * uvm_swapin: swap in a process's u-area.
+ */
+
+void
+uvm_swapin(p)
+	struct proc *p;
+{
+	vaddr_t addr;
+	int s;
+
+	addr = (vaddr_t)p->p_addr;
+	/* make P_INMEM true */
+	uvm_fault_wire(kernel_map, addr, addr + USPACE);
+
+	/*
+	 * Some architectures need to be notified when the user area has
+	 * moved to new physical page(s) (e.g.  see mips/mips/vm_machdep.c).
+	 */
+	cpu_swapin(p);
+	s = splstatclock();
+	if (p->p_stat == SRUN)
+		setrunqueue(p);
+	p->p_flag |= P_INMEM;
+	splx(s);
+	p->p_swtime = 0;
+	++uvmexp.swapins;
+}
+
+/*
+ * uvm_scheduler: process zero main loop
+ *
+ * - attempt to swapin every swaped-out, runnable process in order of
+ *	priority.
+ * - if not enough memory, wake the pagedaemon and let it clear space.
+ */
+
+void
+uvm_scheduler()
+{
+	register struct proc *p;
+	register int pri;
+	struct proc *pp;
+	int ppri;
+	UVMHIST_FUNC("uvm_scheduler"); UVMHIST_CALLED(maphist);
+
+loop:
+#ifdef DEBUG
+	while (!enableswap)
+		tsleep((caddr_t)&proc0, PVM, "noswap", 0);
+#endif
+	pp = NULL;		/* process to choose */
+	ppri = INT_MIN;	/* its priority */
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+
+		/* is it a runnable swapped out process? */
+		if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+			pri = p->p_swtime + p->p_slptime -
+			    (p->p_nice - NZERO) * 8;
+			if (pri > ppri) {   /* higher priority?  remember it. */
+				pp = p;
+				ppri = pri;
+			}
+		}
+	}
+
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("scheduler: running, procp %p pri %d\n", pp, ppri);
+#endif
+	/*
+	 * Nothing to do, back to sleep
+	 */
+	if ((p = pp) == NULL) {
+		tsleep((caddr_t)&proc0, PVM, "scheduler", 0);
+		goto loop;
+	}
+
+	/*
+	 * we have found swapped out process which we would like to bring
+	 * back in.
+	 *
+	 * XXX: this part is really bogus cuz we could deadlock on memory
+	 * despite our feeble check
+	 */
+	if (uvmexp.free > atop(USPACE)) {
+#ifdef DEBUG
+		if (swapdebug & SDB_SWAPIN)
+			printf("swapin: pid %d(%s)@%p, pri %d free %d\n",
+	     p->p_pid, p->p_comm, p->p_addr, ppri, uvmexp.free);
+#endif
+		uvm_swapin(p);
+		goto loop;
+	}
+	/*
+	 * not enough memory, jab the pageout daemon and wait til the coast
+	 * is clear
+	 */
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("scheduler: no room for pid %d(%s), free %d\n",
+	   p->p_pid, p->p_comm, uvmexp.free);
+#endif
+	printf("scheduler: no room for pid %d(%s), free %d\n",
+	   p->p_pid, p->p_comm, uvmexp.free);/*XXXCDC: HIGHLY BOGUS */
+	(void) splhigh();
+	uvm_wait("schedpwait");
+	(void) spl0();
+#ifdef DEBUG
+	if (swapdebug & SDB_FOLLOW)
+		printf("scheduler: room again, free %d\n", uvmexp.free);
+#endif
+	goto loop;
+}
+
+/*
+ * swappable: is process "p" swappable?
+ */
+
+#define	swappable(p)							\
+	(((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM &&	\
+	 (p)->p_holdcnt == 0)
+
+/*
+ * swapout_threads: find threads that can be swapped and unwire their
+ *	u-areas.
+ *
+ * - called by the pagedaemon
+ * - try and swap at least one processs
+ * - processes that are sleeping or stopped for maxslp or more seconds
+ *   are swapped... otherwise the longest-sleeping or stopped process
+ *   is swapped, otherwise the longest resident process...
+ */
+void
+uvm_swapout_threads()
+{
+	register struct proc *p;
+	struct proc *outp, *outp2;
+	int outpri, outpri2;
+	int didswap = 0;
+	extern int maxslp; 
+	/* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
+
+#ifdef DEBUG
+	if (!enableswap)
+		return;
+#endif
+
+	/*
+	 * outp/outpri  : stop/sleep process with largest sleeptime < maxslp
+	 * outp2/outpri2: the longest resident process (its swap time)
+	 */
+	outp = outp2 = NULL;
+	outpri = outpri2 = 0;
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		if (!swappable(p))
+			continue;
+		switch (p->p_stat) {
+		case SRUN:
+			if (p->p_swtime > outpri2) {
+				outp2 = p;
+				outpri2 = p->p_swtime;
+			}
+			continue;
+			
+		case SSLEEP:
+		case SSTOP:
+			if (p->p_slptime >= maxslp) {
+				uvm_swapout(p);			/* zap! */
+				didswap++;
+			} else if (p->p_slptime > outpri) {
+				outp = p;
+				outpri = p->p_slptime;
+			}
+			continue;
+		}
+	}
+
+	/*
+	 * If we didn't get rid of any real duds, toss out the next most
+	 * likely sleeping/stopped or running candidate.  We only do this
+	 * if we are real low on memory since we don't gain much by doing
+	 * it (USPACE bytes).
+	 */
+	if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) {
+		if ((p = outp) == NULL)
+			p = outp2;
+#ifdef DEBUG
+		if (swapdebug & SDB_SWAPOUT)
+			printf("swapout_threads: no duds, try procp %p\n", p);
+#endif
+		if (p)
+			uvm_swapout(p);
+	}
+}
+
+/*
+ * uvm_swapout: swap out process "p"
+ *
+ * - currently "swapout" means "unwire U-area" and "pmap_collect()" 
+ *   the pmap.
+ * - XXXCDC: should deactivate all process' private anonymous memory
+ */
+
+static void
+uvm_swapout(p)
+	register struct proc *p;
+{
+	vaddr_t addr;
+	int s;
+
+#ifdef DEBUG
+	if (swapdebug & SDB_SWAPOUT)
+		printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n",
+	   p->p_pid, p->p_comm, p->p_addr, p->p_stat,
+	   p->p_slptime, uvmexp.free);
+#endif
+
+	/*
+	 * Do any machine-specific actions necessary before swapout.
+	 * This can include saving floating point state, etc.
+	 */
+	cpu_swapout(p);
+
+	/*
+	 * Unwire the to-be-swapped process's user struct and kernel stack.
+	 */
+	addr = (vaddr_t)p->p_addr;
+	uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */
+	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
+
+	/*
+	 * Mark it as (potentially) swapped out.
+	 */
+	s = splstatclock();
+	p->p_flag &= ~P_INMEM;
+	if (p->p_stat == SRUN)
+		remrunqueue(p);
+	splx(s);
+	p->p_swtime = 0;
+	++uvmexp.swapouts;
+}
+
diff --git a/sys/uvm/uvm_glue.h b/sys/uvm/uvm_glue.h
new file mode 100644
index 00000000000..8a137800fcd
--- /dev/null
+++ b/sys/uvm/uvm_glue.h
@@ -0,0 +1,50 @@
+/*	$NetBSD: uvm_glue.h,v 1.4 1998/02/10 02:34:37 perry Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_glue.h,v 1.1.2.1 1997/08/14 19:10:48 chuck Exp
+ */
+
+#ifndef _UVM_UVM_GLUE_H_
+#define _UVM_UVM_GLUE_H_
+
+/*
+ * uvm_glue.h
+ */
+
+void uvm_swapout_threads __P((void));
+
+#endif /* _UVM_UVM_GLUE_H_ */
diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
new file mode 100644
index 00000000000..95406c95b0c
--- /dev/null
+++ b/sys/uvm/uvm_init.c
@@ -0,0 +1,167 @@
+/*	$NetBSD: uvm_init.c,v 1.10 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_init.c,v 1.1.2.3 1998/02/06 05:15:27 chs Exp
+ */
+
+/*
+ * uvm_init.c: init the vm system.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * struct uvm: we store all global vars in this structure to make them
+ * easier to spot...
+ */
+
+struct uvm uvm;		/* decl */
+struct uvmexp uvmexp;	/* decl */
+
+/*
+ * local prototypes
+ */
+
+/*
+ * uvm_init: init the VM system.   called from kern/init_main.c.
+ */
+
+void
+uvm_init()
+{
+	vaddr_t kvm_start, kvm_end;
+
+	/*
+	 * step 0: ensure that the hardware set the page size
+	 */
+
+	if (uvmexp.pagesize == 0) {
+		panic("uvm_init: page size not set");
+	}
+
+	/*
+	 * step 1: zero the uvm structure
+	 */
+
+	bzero(&uvm, sizeof(uvm));
+	averunnable.fscale = FSCALE;
+
+	/*
+	 * step 2: init the page sub-system.  this includes allocating the
+	 * vm_page structures, and setting up all the page queues (and
+	 * locks).  available memory will be put in the "free" queue.
+	 * kvm_start and kvm_end will be set to the area of kernel virtual
+	 * memory which is available for general use.
+	 */
+
+	uvm_page_init(&kvm_start, &kvm_end);
+
+	/*
+	 * step 3: init the map sub-system.  allocates the static pool of
+	 * vm_map_entry structures that are used for "special" kernel maps
+	 * (e.g. kernel_map, kmem_map, etc...).
+	 */
+
+	uvm_map_init();
+
+	/*
+	 * step 4: setup the kernel's virtual memory data structures.  this
+	 * includes setting up the kernel_map/kernel_object and the kmem_map/
+	 * kmem_object.
+	 */
+
+	uvm_km_init(kvm_start, kvm_end);
+
+	/*
+	 * step 5: init the pmap module.   the pmap module is free to allocate
+	 * memory for its private use (e.g. pvlists).
+	 */
+
+	pmap_init();
+
+	/*
+	 * step 6: init the kernel memory allocator.   after this call the
+	 * kernel memory allocator (malloc) can be used.
+	 */
+
+	kmeminit();
+
+	/*
+	 * step 7: init all pagers and the pager_map.
+	 */
+
+	uvm_pager_init();
+
+	/*
+	 * step 8: init anonymous memory systems (both amap and anons)
+	 */
+
+	amap_init();		/* init amap module */
+	uvm_anon_init();	/* allocate initial anons */
+
+	/*
+	 * the VM system is now up!  now that malloc is up we can resize the
+	 * <obj,off> => <page> hash table for general use and enable paging
+	 * of kernel objects.
+	 */
+
+	uvm_page_rehash();
+	uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+	    UAO_FLAG_KERNSWAP);
+
+	/*
+	 * done!
+	 */
+
+	return;
+}
diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c
new file mode 100644
index 00000000000..603e04b26d9
--- /dev/null
+++ b/sys/uvm/uvm_io.c
@@ -0,0 +1,163 @@
+/*	$NetBSD: uvm_io.c,v 1.7 1998/10/11 23:18:20 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_io.c,v 1.1.2.2 1997/12/30 12:02:00 mrg Exp
+ */
+
+/*
+ * uvm_io.c: uvm i/o ops
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * functions
+ */
+
+/*
+ * uvm_io: perform I/O on a map
+ *
+ * => caller must have a reference to "map" so that it doesn't go away
+ *    while we are working.
+ */
+
+int
+uvm_io(map, uio)
+	vm_map_t map;
+	struct uio *uio;
+{
+	vaddr_t baseva, endva, pageoffset, kva;
+	vsize_t chunksz, togo, sz;
+	vm_map_entry_t dead_entries;
+	int error;
+
+	/*
+	 * step 0: sanity checks and set up for copy loop.  start with a
+	 * large chunk size.  if we have trouble finding vm space we will
+	 * reduce it.
+	 */
+
+	if (uio->uio_resid == 0)
+		return(0);
+	togo = uio->uio_resid;
+
+	baseva = (vaddr_t) uio->uio_offset;
+	endva = baseva + (togo - 1);
+
+	if (endva < baseva)   /* wrap around? */
+		return(EIO);
+
+	if (baseva >= VM_MAXUSER_ADDRESS)
+		return(0);
+	if (endva >= VM_MAXUSER_ADDRESS)
+		/* EOF truncate */
+		togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
+	pageoffset = baseva & PAGE_MASK;
+	baseva = trunc_page(baseva);
+	chunksz = min(round_page(togo + pageoffset), MAXBSIZE);
+	error = 0;
+
+	/*
+	 * step 1: main loop...  while we've got data to move
+	 */
+
+	for (/*null*/; togo > 0 ; pageoffset = 0) {
+
+		/*
+		 * step 2: extract mappings from the map into kernel_map
+		 */
+
+		error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,
+			    UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG | 
+			    UVM_EXTRACT_FIXPROT);
+		if (error) {
+
+			/* retry with a smaller chunk... */
+			if (error == ENOMEM && chunksz > PAGE_SIZE) {
+				chunksz = trunc_page(chunksz / 2);
+				if (chunksz < PAGE_SIZE)
+					chunksz = PAGE_SIZE;
+				continue;
+			}
+
+			break;
+		}
+
+		/*
+		 * step 3: move a chunk of data
+		 */
+
+		sz = chunksz - pageoffset;
+		if (sz > togo)
+			sz = togo;
+		error = uiomove((caddr_t) (kva + pageoffset), sz, uio);
+		if (error)
+			break;
+		togo -= sz;
+		baseva += chunksz;
+
+
+		/*
+		 * step 4: unmap the area of kernel memory
+		 */
+
+		vm_map_lock(kernel_map);
+		(void)uvm_unmap_remove(kernel_map, kva, kva+chunksz,
+		    &dead_entries);
+		vm_map_unlock(kernel_map);
+
+		if (dead_entries != NULL)
+			uvm_unmap_detach(dead_entries, AMAP_REFALL);
+	}
+
+	/*
+	 * done
+	 */
+
+	return (error);
+}
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
new file mode 100644
index 00000000000..49e9e5191bc
--- /dev/null
+++ b/sys/uvm/uvm_km.c
@@ -0,0 +1,1081 @@
+/*	$NetBSD: uvm_km.c,v 1.18 1998/10/18 23:49:59 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_kern.c   8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_km.c: handle kernel memory allocation and management
+ */
+
+/*
+ * overview of kernel memory management:
+ *
+ * the kernel virtual address space is mapped by "kernel_map."   kernel_map
+ * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS.
+ * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map).
+ *
+ * the kernel_map has several "submaps."   submaps can only appear in 
+ * the kernel_map (user processes can't use them).   submaps "take over"
+ * the management of a sub-range of the kernel's address space.  submaps
+ * are typically allocated at boot time and are never released.   kernel
+ * virtual address space that is mapped by a submap is locked by the 
+ * submap's lock -- not the kernel_map's lock.
+ *
+ * thus, the useful feature of submaps is that they allow us to break
+ * up the locking and protection of the kernel address space into smaller
+ * chunks.
+ *
+ * the vm system has several standard kernel submaps, including:
+ *   kmem_map => contains only wired kernel memory for the kernel
+ *		malloc.   *** access to kmem_map must be protected
+ *		by splimp() because we are allowed to call malloc()
+ *		at interrupt time ***
+ *   mb_map => memory for large mbufs,  *** protected by splimp ***
+ *   pager_map => used to map "buf" structures into kernel space
+ *   exec_map => used during exec to handle exec args
+ *   etc...
+ *
+ * the kernel allocates its private memory out of special uvm_objects whose
+ * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
+ * are "special" and never die).   all kernel objects should be thought of
+ * as large, fixed-sized, sparsely populated uvm_objects.   each kernel 
+ * object is equal to the size of kernel virtual address space (i.e. the
+ * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS").
+ *
+ * most kernel private memory lives in kernel_object.   the only exception
+ * to this is for memory that belongs to submaps that must be protected
+ * by splimp().    each of these submaps has their own private kernel 
+ * object (e.g. kmem_object, mb_object).
+ *
+ * note that just because a kernel object spans the entire kernel virutal
+ * address space doesn't mean that it has to be mapped into the entire space.
+ * large chunks of a kernel object's space go unused either because 
+ * that area of kernel VM is unmapped, or there is some other type of 
+ * object mapped into that range (e.g. a vnode).    for submap's kernel
+ * objects, the only part of the object that can ever be populated is the
+ * offsets that are managed by the submap.
+ *
+ * note that the "offset" in a kernel object is always the kernel virtual
+ * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)).
+ * example:
+ *   suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a
+ *   uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
+ *   kernel map].    if uvm_km_alloc returns virtual address 0xf8235000,
+ *   then that means that the page at offset 0x235000 in kernel_object is
+ *   mapped at 0xf8235000.   
+ *
+ * note that the offsets in kmem_object and mb_object also follow this
+ * rule.   this means that the offsets for kmem_object must fall in the
+ * range of [vm_map_min(kmem_object) - vm_map_min(kernel_map)] to
+ * [vm_map_max(kmem_object) - vm_map_min(kernel_map)], so the offsets
+ * in those objects will typically not start at zero.
+ *
+ * kernel object have one other special property: when the kernel virtual
+ * memory mapping them is unmapped, the backing memory in the object is
+ * freed right away.   this is done with the uvm_km_pgremove() function.
+ * this has to be done because there is no backing store for kernel pages
+ * and no need to save them after they are no longer referenced.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * global data structures
+ */
+
+vm_map_t kernel_map = NULL;
+
+/*
+ * local functions
+ */
+
+static int uvm_km_get __P((struct uvm_object *, vaddr_t, 
+													 vm_page_t *, int *, int, vm_prot_t, int, int));
+/*
+ * local data structues
+ */
+
+static struct vm_map		kernel_map_store;
+static struct uvm_object	kmem_object_store;
+static struct uvm_object	mb_object_store;
+
+static struct uvm_pagerops km_pager = {
+	NULL,	/* init */
+	NULL, /* attach */
+	NULL, /* reference */
+	NULL, /* detach */
+	NULL, /* fault */
+	NULL, /* flush */
+	uvm_km_get, /* get */
+	/* ... rest are NULL */
+};
+
+/*
+ * uvm_km_get: pager get function for kernel objects
+ *
+ * => currently we do not support pageout to the swap area, so this
+ *    pager is very simple.    eventually we may want an anonymous 
+ *    object pager which will do paging.
+ * => XXXCDC: this pager should be phased out in favor of the aobj pager
+ */
+
+
+static int
+uvm_km_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	struct vm_page **pps;
+	int *npagesp;
+	int centeridx, advice, flags;
+	vm_prot_t access_type;
+{
+	vaddr_t current_offset;
+	vm_page_t ptmp;
+	int lcv, gotpages, maxpages;
+	boolean_t done;
+	UVMHIST_FUNC("uvm_km_get"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
+	
+	/*
+	 * get number of pages
+	 */
+
+	maxpages = *npagesp;
+
+	/*
+	 * step 1: handled the case where fault data structures are locked.
+	 */
+
+	if (flags & PGO_LOCKED) {
+
+		/*
+		 * step 1a: get pages that are already resident.   only do
+		 * this if the data structures are locked (i.e. the first time
+		 * through).
+		 */
+
+		done = TRUE;	/* be optimistic */
+		gotpages = 0;	/* # of pages we got so far */
+
+		for (lcv = 0, current_offset = offset ; 
+		    lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) {
+
+			/* do we care about this page?  if not, skip it */
+			if (pps[lcv] == PGO_DONTCARE)
+				continue;
+
+			/* lookup page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+			
+			/* null?  attempt to allocate the page */
+			if (ptmp == NULL) {
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL);
+				if (ptmp) {
+					/* new page */
+					ptmp->flags &= ~(PG_BUSY|PG_FAKE);
+					UVM_PAGE_OWN(ptmp, NULL);
+					uvm_pagezero(ptmp);
+				}
+			}
+
+			/*
+			 * to be useful must get a non-busy, non-released page
+			 */
+			if (ptmp == NULL ||
+			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				if (lcv == centeridx ||
+				    (flags & PGO_ALLPAGES) != 0)
+					/* need to do a wait or I/O! */
+					done = FALSE;
+				continue;
+			}
+
+			/*
+			 * useful page: busy/lock it and plug it in our
+			 * result array
+			 */
+
+			/* caller must un-busy this page */
+			ptmp->flags |= PG_BUSY;	
+			UVM_PAGE_OWN(ptmp, "uvm_km_get1");
+			pps[lcv] = ptmp;
+			gotpages++;
+
+		}	/* "for" lcv loop */
+
+		/*
+		 * step 1b: now we've either done everything needed or we
+		 * to unlock and do some waiting or I/O.
+		 */
+
+		UVMHIST_LOG(maphist, "<- done (done=%d)", done, 0,0,0);
+
+		*npagesp = gotpages;
+		if (done)
+			return(VM_PAGER_OK);		/* bingo! */
+		else
+			return(VM_PAGER_UNLOCK);	/* EEK!   Need to
+							 * unlock and I/O */
+	}
+
+	/*
+	 * step 2: get non-resident or busy pages.
+	 * object is locked.   data structures are unlocked.
+	 */
+
+	for (lcv = 0, current_offset = offset ; 
+	    lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) {
+		
+		/* skip over pages we've already gotten or don't want */
+		/* skip over pages we don't _have_ to get */
+		if (pps[lcv] != NULL ||
+		    (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
+			continue;
+
+		/*
+		 * we have yet to locate the current page (pps[lcv]).   we
+		 * first look for a page that is already at the current offset.
+		 * if we find a page, we check to see if it is busy or
+		 * released.  if that is the case, then we sleep on the page
+		 * until it is no longer busy or released and repeat the
+		 * lookup.    if the page we found is neither busy nor
+		 * released, then we busy it (so we own it) and plug it into
+		 * pps[lcv].   this 'break's the following while loop and
+		 * indicates we are ready to move on to the next page in the
+		 * "lcv" loop above.
+		 *
+		 * if we exit the while loop with pps[lcv] still set to NULL,
+		 * then it means that we allocated a new busy/fake/clean page
+		 * ptmp in the object and we need to do I/O to fill in the
+		 * data.
+		 */
+
+		while (pps[lcv] == NULL) {	/* top of "pps" while loop */
+			
+			/* look for a current page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* nope?   allocate one now (if we can) */
+			if (ptmp == NULL) {
+
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL);	/* alloc */
+
+				/* out of RAM? */
+				if (ptmp == NULL) {
+					simple_unlock(&uobj->vmobjlock);
+					uvm_wait("kmgetwait1");
+					simple_lock(&uobj->vmobjlock);
+					/* goto top of pps while loop */
+					continue;
+				}
+
+				/* 
+				 * got new page ready for I/O.  break pps
+				 * while loop.  pps[lcv] is still NULL.
+				 */
+				break;		
+			}
+
+			/* page is there, see if we need to wait on it */
+			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				ptmp->flags |= PG_WANTED;
+				UVM_UNLOCK_AND_WAIT(ptmp,&uobj->vmobjlock, 0,
+				    "uvn_get",0);
+				simple_lock(&uobj->vmobjlock);
+				continue;	/* goto top of pps while loop */
+			}
+			
+			/* 
+			 * if we get here then the page has become resident
+			 * and unbusy between steps 1 and 2.  we busy it now
+			 * (so we own it) and set pps[lcv] (so that we exit
+			 * the while loop).  caller must un-busy.
+			 */
+			ptmp->flags |= PG_BUSY;
+			UVM_PAGE_OWN(ptmp, "uvm_km_get2");
+			pps[lcv] = ptmp;
+		}
+
+		/*
+		 * if we own the a valid page at the correct offset, pps[lcv]
+		 * will point to it.   nothing more to do except go to the
+		 * next page.
+		 */
+
+		if (pps[lcv])
+			continue;			/* next lcv */
+
+		/*
+		 * we have a "fake/busy/clean" page that we just allocated.  
+		 * do the needed "i/o" (in this case that means zero it).
+		 */
+
+		uvm_pagezero(ptmp);
+		ptmp->flags &= ~(PG_FAKE);
+		pps[lcv] = ptmp;
+
+	}	/* lcv loop */
+
+	/*
+	 * finally, unlock object and return.
+	 */
+
+	simple_unlock(&uobj->vmobjlock);
+	UVMHIST_LOG(maphist, "<- done (OK)",0,0,0,0);
+	return(VM_PAGER_OK);
+}
+
+/*
+ * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
+ * KVM already allocated for text, data, bss, and static data structures).
+ *
+ * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS.
+ *    we assume that [min -> start] has already been allocated and that
+ *    "end" is the end.
+ */
+
+void
+uvm_km_init(start, end)
+	vaddr_t start, end;
+{
+	vaddr_t base = VM_MIN_KERNEL_ADDRESS;
+
+	/*
+	 * first, init kernel memory objects.
+	 */
+
+	/* kernel_object: for pageable anonymous kernel memory */
+	uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
+				 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
+
+	/* kmem_object: for malloc'd memory (wired, protected by splimp) */
+	simple_lock_init(&kmem_object_store.vmobjlock);
+	kmem_object_store.pgops = &km_pager;
+	TAILQ_INIT(&kmem_object_store.memq);
+	kmem_object_store.uo_npages = 0;
+	/* we are special.  we never die */
+	kmem_object_store.uo_refs = UVM_OBJ_KERN; 
+	uvmexp.kmem_object = &kmem_object_store;
+
+	/* mb_object: for mbuf memory (always wired, protected by splimp) */
+	simple_lock_init(&mb_object_store.vmobjlock);
+	mb_object_store.pgops = &km_pager;
+	TAILQ_INIT(&mb_object_store.memq);
+	mb_object_store.uo_npages = 0;
+	/* we are special.  we never die */
+	mb_object_store.uo_refs = UVM_OBJ_KERN; 
+	uvmexp.mb_object = &mb_object_store;
+
+	/*
+	 * init the map and reserve allready allocated kernel space 
+	 * before installing.
+	 */
+
+	uvm_map_setup(&kernel_map_store, base, end, FALSE);
+	kernel_map_store.pmap = pmap_kernel();
+	if (uvm_map(&kernel_map_store, &base, start - base, NULL,
+	    UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
+	    UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != KERN_SUCCESS)
+		panic("uvm_km_init: could not reserve space for kernel");
+	
+	/*
+	 * install!
+	 */
+
+	kernel_map = &kernel_map_store;
+}
+
+/*
+ * uvm_km_suballoc: allocate a submap in the kernel map.   once a submap
+ * is allocated all references to that area of VM must go through it.  this
+ * allows the locking of VAs in kernel_map to be broken up into regions.
+ *
+ * => if `fixed' is true, *min specifies where the region described
+ *      by the submap must start
+ * => if submap is non NULL we use that as the submap, otherwise we
+ *	alloc a new map
+ */
+struct vm_map *
+uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
+	struct vm_map *map;
+	vaddr_t *min, *max;		/* OUT, OUT */
+	vsize_t size;
+	boolean_t pageable;
+	boolean_t fixed;
+	struct vm_map *submap;
+{
+	int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
+
+	size = round_page(size);	/* round up to pagesize */
+
+	/*
+	 * first allocate a blank spot in the parent map
+	 */
+
+	if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET, 
+	    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+	    UVM_ADV_RANDOM, mapflags)) != KERN_SUCCESS) {
+	       panic("uvm_km_suballoc: unable to allocate space in parent map");
+	}
+
+	/*
+	 * set VM bounds (min is filled in by uvm_map)
+	 */
+
+	*max = *min + size;
+
+	/*
+	 * add references to pmap and create or init the submap
+	 */
+
+	pmap_reference(vm_map_pmap(map));
+	if (submap == NULL) {
+		submap = uvm_map_create(vm_map_pmap(map), *min, *max, pageable);
+		if (submap == NULL)
+			panic("uvm_km_suballoc: unable to create submap");
+	} else {
+		uvm_map_setup(submap, *min, *max, pageable);
+		submap->pmap = vm_map_pmap(map);
+	}
+
+	/*
+	 * now let uvm_map_submap plug in it...
+	 */
+
+	if (uvm_map_submap(map, *min, *max, submap) != KERN_SUCCESS)
+		panic("uvm_km_suballoc: submap allocation failed");
+
+	return(submap);
+}
+
+/*
+ * uvm_km_pgremove: remove pages from a kernel uvm_object.
+ *
+ * => when you unmap a part of anonymous kernel memory you want to toss
+ *    the pages right away.    (this gets called from uvm_unmap_...).
+ */
+
+#define UKM_HASH_PENALTY 4      /* a guess */
+
+void
+uvm_km_pgremove(uobj, start, end)
+	struct uvm_object *uobj;
+	vaddr_t start, end;
+{
+	boolean_t by_list, is_aobj;
+	struct vm_page *pp, *ppnext;
+	vaddr_t curoff;
+	UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);		/* lock object */
+
+	/* is uobj an aobj? */
+	is_aobj = uobj->pgops == &aobj_pager;
+
+	/* choose cheapest traversal */
+	by_list = (uobj->uo_npages <=
+	     ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
+ 
+	if (by_list)
+		goto loop_by_list;
+
+	/* by hash */
+
+	for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
+		pp = uvm_pagelookup(uobj, curoff);
+		if (pp == NULL)
+			continue;
+
+		UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
+		    pp->flags & PG_BUSY, 0, 0);
+		/* now do the actual work */
+		if (pp->flags & PG_BUSY)
+			/* owner must check for this when done */
+			pp->flags |= PG_RELEASED;
+		else {
+			pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+
+			/*
+			 * if this kernel object is an aobj, free the swap slot.
+			 */
+			if (is_aobj) {
+				int slot = uao_set_swslot(uobj,
+							  curoff >> PAGE_SHIFT,
+							  0);
+
+				if (slot)
+					uvm_swap_free(slot, 1);
+			}
+
+			uvm_lock_pageq();
+			uvm_pagefree(pp);
+			uvm_unlock_pageq();
+		}
+		/* done */
+
+	}
+	simple_unlock(&uobj->vmobjlock);
+	return;
+
+loop_by_list:
+
+	for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
+
+		ppnext = pp->listq.tqe_next;
+		if (pp->offset < start || pp->offset >= end) {
+			continue;
+		}
+
+		UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
+		    pp->flags & PG_BUSY, 0, 0);
+		/* now do the actual work */
+		if (pp->flags & PG_BUSY)
+			/* owner must check for this when done */
+			pp->flags |= PG_RELEASED;
+		else {
+			pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+
+			/*
+			 * if this kernel object is an aobj, free the swap slot.
+			 */
+			if (is_aobj) {
+				int slot = uao_set_swslot(uobj,
+						pp->offset >> PAGE_SHIFT, 0);
+
+				if (slot)
+					uvm_swap_free(slot, 1);
+			}
+
+			uvm_lock_pageq();
+			uvm_pagefree(pp);
+			uvm_unlock_pageq();
+		}
+		/* done */
+
+	}
+	simple_unlock(&uobj->vmobjlock);
+	return;
+}
+
+
+/*
+ * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
+ *
+ * => we map wired memory into the specified map using the obj passed in
+ * => NOTE: we can return NULL even if we can wait if there is not enough
+ *	free VM space in the map... caller should be prepared to handle
+ *	this case.
+ * => we return KVA of memory allocated
+ * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
+ *	lock the map
+ */
+
+vaddr_t
+uvm_km_kmemalloc(map, obj, size, flags)
+	vm_map_t map;
+	struct uvm_object *obj;
+	vsize_t size;
+	int flags;
+{
+	vaddr_t kva, loopva;
+	vaddr_t offset;
+	struct vm_page *pg;
+	UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist);
+
+
+	UVMHIST_LOG(maphist,"  (map=0x%x, obj=0x%x, size=0x%x, flags=%d)",
+	map, obj, size, flags);
+#ifdef DIAGNOSTIC
+	/* sanity check */
+	if (vm_map_pmap(map) != pmap_kernel())
+		panic("uvm_km_kmemalloc: invalid map");
+#endif
+
+	/*
+	 * setup for call
+	 */
+
+	size = round_page(size);
+	kva = vm_map_min(map);	/* hint */
+
+	/*
+	 * allocate some virtual space
+	 */
+
+	if (uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
+	      UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+			  UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK))) 
+			!= KERN_SUCCESS) {
+		UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
+		return(0);
+	}
+
+	/*
+	 * if all we wanted was VA, return now
+	 */
+
+	if (flags & UVM_KMF_VALLOC) {
+		UVMHIST_LOG(maphist,"<- done valloc (kva=0x%x)", kva,0,0,0);
+		return(kva);
+	}
+	/*
+	 * recover object offset from virtual address
+	 */
+
+	offset = kva - vm_map_min(kernel_map);
+	UVMHIST_LOG(maphist, "  kva=0x%x, offset=0x%x", kva, offset,0,0);
+
+	/*
+	 * now allocate and map in the memory... note that we are the only ones
+	 * whom should ever get a handle on this area of VM.
+	 */
+
+	loopva = kva;
+	while (size) {
+		simple_lock(&obj->vmobjlock);
+		pg = uvm_pagealloc(obj, offset, NULL);
+		if (pg) {
+			pg->flags &= ~PG_BUSY;	/* new page */
+			UVM_PAGE_OWN(pg, NULL);
+		}
+		simple_unlock(&obj->vmobjlock);
+		
+		/*
+		 * out of memory?
+		 */
+
+		if (pg == NULL) {
+			if (flags & UVM_KMF_NOWAIT) {
+				/* free everything! */
+				uvm_unmap(map, kva, kva + size);
+				return(0);
+			} else {
+				uvm_wait("km_getwait2");	/* sleep here */
+				continue;
+			}
+		}
+		
+		/*
+		 * map it in: note that we call pmap_enter with the map and
+		 * object unlocked in case we are kmem_map/kmem_object
+		 * (because if pmap_enter wants to allocate out of kmem_object
+		 * it will need to lock it itself!)
+		 */
+#if defined(PMAP_NEW)
+		pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+#else
+		pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+		    UVM_PROT_ALL, TRUE);
+#endif
+		loopva += PAGE_SIZE;
+		offset += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+	return(kva);
+}
+
+/*
+ * uvm_km_free: free an area of kernel memory
+ */
+
+void
+uvm_km_free(map, addr, size)
+	vm_map_t map;
+	vaddr_t addr;
+	vsize_t size;
+{
+
+	uvm_unmap(map, trunc_page(addr), round_page(addr+size));
+}
+
+/*
+ * uvm_km_free_wakeup: free an area of kernel memory and wake up
+ * anyone waiting for vm space.
+ *
+ * => XXX: "wanted" bit + unlock&wait on other end?
+ */
+
+void
+uvm_km_free_wakeup(map, addr, size)
+	vm_map_t map;
+	vaddr_t addr;
+	vsize_t size;
+{
+	vm_map_entry_t dead_entries;
+
+	vm_map_lock(map);
+	(void)uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size), 
+			 &dead_entries);
+	thread_wakeup(map);
+	vm_map_unlock(map);
+
+	if (dead_entries != NULL)
+		uvm_unmap_detach(dead_entries, 0);
+}
+
+/*
+ * uvm_km_alloc1: allocate wired down memory in the kernel map.
+ *
+ * => we can sleep if needed
+ */
+
+vaddr_t
+uvm_km_alloc1(map, size, zeroit)
+	vm_map_t map;
+	vsize_t size;
+	boolean_t zeroit;
+{
+	vaddr_t kva, loopva, offset;
+	struct vm_page *pg;
+	UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist,"(map=0x%x, size=0x%x)", map, size,0,0);
+
+#ifdef DIAGNOSTIC
+	if (vm_map_pmap(map) != pmap_kernel())
+		panic("uvm_km_alloc1");
+#endif
+
+	size = round_page(size);
+	kva = vm_map_min(map);		/* hint */
+
+	/*
+	 * allocate some virtual space
+	 */
+
+	if (uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET,
+	      UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+			  UVM_ADV_RANDOM, 0)) != KERN_SUCCESS) {
+		UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0);
+		return(0);
+	}
+
+	/*
+	 * recover object offset from virtual address
+	 */
+
+	offset = kva - vm_map_min(kernel_map);
+	UVMHIST_LOG(maphist,"  kva=0x%x, offset=0x%x", kva, offset,0,0);
+
+	/*
+	 * now allocate the memory.  we must be careful about released pages.
+	 */
+
+	loopva = kva;
+	while (size) {
+		simple_lock(&uvm.kernel_object->vmobjlock);
+		pg = uvm_pagelookup(uvm.kernel_object, offset);
+
+		/*
+		 * if we found a page in an unallocated region, it must be
+		 * released
+		 */
+		if (pg) {
+			if ((pg->flags & PG_RELEASED) == 0)
+				panic("uvm_km_alloc1: non-released page");
+			pg->flags |= PG_WANTED;
+			UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock,
+			    0, "km_alloc", 0);
+			continue;   /* retry */
+		}
+		
+		/* allocate ram */
+		pg = uvm_pagealloc(uvm.kernel_object, offset, NULL);
+		if (pg) {
+			pg->flags &= ~PG_BUSY;	/* new page */
+			UVM_PAGE_OWN(pg, NULL);
+		}
+		simple_unlock(&uvm.kernel_object->vmobjlock);
+		if (pg == NULL) {
+			uvm_wait("km_alloc1w");	/* wait for memory */
+			continue;
+		}
+		
+		/* map it in */
+#if defined(PMAP_NEW)
+		pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL);
+#else
+		pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+		    UVM_PROT_ALL, TRUE);
+#endif
+		loopva += PAGE_SIZE;
+		offset += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	
+	/*
+	 * zero on request (note that "size" is now zero due to the above loop
+	 * so we need to subtract kva from loopva to reconstruct the size).
+	 */
+
+	if (zeroit)
+		bzero((caddr_t)kva, loopva - kva);
+
+	UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+	return(kva);
+}
+
+/*
+ * uvm_km_valloc: allocate zero-fill memory in the kernel's address space
+ *
+ * => memory is not allocated until fault time
+ */
+
+vaddr_t
+uvm_km_valloc(map, size)
+	vm_map_t map;
+	vsize_t size;
+{
+	vaddr_t kva;
+	UVMHIST_FUNC("uvm_km_valloc"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x)", map, size, 0,0);
+
+#ifdef DIAGNOSTIC
+	if (vm_map_pmap(map) != pmap_kernel())
+		panic("uvm_km_valloc");
+#endif
+
+	size = round_page(size);
+	kva = vm_map_min(map);		/* hint */
+
+	/*
+	 * allocate some virtual space.  will be demand filled by kernel_object.
+	 */
+
+	if (uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET,
+	    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+	    UVM_ADV_RANDOM, 0)) != KERN_SUCCESS) {
+		UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
+		return(0);
+	}
+
+	UVMHIST_LOG(maphist, "<- done (kva=0x%x)", kva,0,0,0);
+	return(kva);
+}
+
+/*
+ * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space
+ *
+ * => memory is not allocated until fault time
+ * => if no room in map, wait for space to free, unless requested size
+ *    is larger than map (in which case we return 0)
+ */
+
+vaddr_t
+uvm_km_valloc_wait(map, size)
+	vm_map_t map;
+	vsize_t size;
+{
+	vaddr_t kva;
+	UVMHIST_FUNC("uvm_km_valloc_wait"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x)", map, size, 0,0);
+
+#ifdef DIAGNOSTIC
+	if (vm_map_pmap(map) != pmap_kernel())
+		panic("uvm_km_valloc_wait");
+#endif
+
+	size = round_page(size);
+	if (size > vm_map_max(map) - vm_map_min(map))
+		return(0);
+
+	while (1) {
+		kva = vm_map_min(map);		/* hint */
+
+		/*
+		 * allocate some virtual space.   will be demand filled
+		 * by kernel_object.
+		 */
+
+		if (uvm_map(map, &kva, size, uvm.kernel_object,
+		    UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+		    UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0))
+		    == KERN_SUCCESS) {
+			UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+			return(kva);
+		}
+
+		/*
+		 * failed.  sleep for a while (on map)
+		 */
+
+		UVMHIST_LOG(maphist,"<<<sleeping>>>",0,0,0,0);
+		tsleep((caddr_t)map, PVM, "vallocwait", 0);
+	}
+	/*NOTREACHED*/
+}
+
+/* Sanity; must specify both or none. */
+#if (defined(PMAP_MAP_POOLPAGE) || defined(PMAP_UNMAP_POOLPAGE)) && \
+    (!defined(PMAP_MAP_POOLPAGE) || !defined(PMAP_UNMAP_POOLPAGE))
+#error Must specify MAP and UNMAP together.
+#endif
+
+/*
+ * uvm_km_alloc_poolpage: allocate a page for the pool allocator
+ *
+ * => if the pmap specifies an alternate mapping method, we use it.
+ */
+
+/* ARGSUSED */
+vaddr_t
+uvm_km_alloc_poolpage1(map, obj, waitok)
+	vm_map_t map;
+	struct uvm_object *obj;
+	boolean_t waitok;
+{
+#if defined(PMAP_MAP_POOLPAGE)
+	struct vm_page *pg;
+	vaddr_t va;
+
+ again:
+	pg = uvm_pagealloc(NULL, 0, NULL);
+	if (pg == NULL) {
+		if (waitok) {
+			uvm_wait("plpg");
+			goto again;
+		} else
+			return (0);
+	}
+	va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
+	if (va == 0)
+		uvm_pagefree(pg);
+	return (va);
+#else
+	vaddr_t va;
+	int s;
+
+	/*
+	 * NOTE: We may be called with a map that doens't require splimp
+	 * protection (e.g. kernel_map).  However, it does not hurt to
+	 * go to splimp in this case (since unprocted maps will never be
+	 * accessed in interrupt context).
+	 *
+	 * XXX We may want to consider changing the interface to this
+	 * XXX function.
+	 */
+
+	s = splimp();
+	va = uvm_km_kmemalloc(map, obj, PAGE_SIZE, waitok ? 0 : UVM_KMF_NOWAIT);
+	splx(s);
+	return (va);
+#endif /* PMAP_MAP_POOLPAGE */
+}
+
+/*
+ * uvm_km_free_poolpage: free a previously allocated pool page
+ *
+ * => if the pmap specifies an alternate unmapping method, we use it.
+ */
+
+/* ARGSUSED */
+void
+uvm_km_free_poolpage1(map, addr)
+	vm_map_t map;
+	vaddr_t addr;
+{
+#if defined(PMAP_UNMAP_POOLPAGE)
+	paddr_t pa;
+
+	pa = PMAP_UNMAP_POOLPAGE(addr);
+	uvm_pagefree(PHYS_TO_VM_PAGE(pa));
+#else
+	int s;
+
+	/*
+	 * NOTE: We may be called with a map that doens't require splimp
+	 * protection (e.g. kernel_map).  However, it does not hurt to
+	 * go to splimp in this case (since unprocted maps will never be
+	 * accessed in interrupt context).
+	 *
+	 * XXX We may want to consider changing the interface to this
+	 * XXX function.
+	 */
+
+	s = splimp();
+	uvm_km_free(map, addr, PAGE_SIZE);
+	splx(s);
+#endif /* PMAP_UNMAP_POOLPAGE */
+}
diff --git a/sys/uvm/uvm_km.h b/sys/uvm/uvm_km.h
new file mode 100644
index 00000000000..ba941255020
--- /dev/null
+++ b/sys/uvm/uvm_km.h
@@ -0,0 +1,55 @@
+/*	$NetBSD: uvm_km.h,v 1.6 1998/08/13 02:11:01 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_km.h,v 1.1.2.2 1997/12/30 12:03:15 mrg Exp
+ */
+
+#ifndef _UVM_UVM_KM_H_
+#define _UVM_UVM_KM_H_
+
+/*
+ * uvm_km.h
+ */
+
+/*
+ * prototypes
+ */
+
+void uvm_km_init __P((vaddr_t, vaddr_t));
+void uvm_km_pgremove __P((struct uvm_object *, vaddr_t, vaddr_t));
+
+#endif /* _UVM_UVM_KM_H_ */
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
new file mode 100644
index 00000000000..d8716b46f52
--- /dev/null
+++ b/sys/uvm/uvm_loan.c
@@ -0,0 +1,755 @@
+/*	$NetBSD: uvm_loan.c,v 1.13 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
+ */
+
+/*
+ * uvm_loan.c: page loanout handler
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * "loaned" pages are pages which are (read-only, copy-on-write) loaned 
+ * from the VM system to other parts of the kernel.   this allows page
+ * copying to be avoided (e.g. you can loan pages from objs/anons to
+ * the mbuf system).
+ *
+ * there are 3 types of loans possible:
+ *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
+ *  A->K  anon page to kernel wired kernel page (e.g. mbuf data area)
+ *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
+ * note that it possible to have an O page loaned to both an A and K
+ * at the same time.
+ *
+ * loans are tracked by pg->loan_count.  an O->A page will have both
+ * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
+ * of page is considered "owned" by the uvm_object (not the anon).
+ *
+ * each loan of a page to a wired kernel page bumps the pg->wire_count.
+ * wired kernel mappings should be entered with pmap_kenter functions
+ * so that pmap_page_protect() will not affect the kernel mappings.
+ * (this requires the PMAP_NEW interface...).
+ *
+ * owners that want to free their pages and discover that they are 
+ * loaned out simply "disown" them (the page becomes an orphan).  these
+ * pages should be freed when the last loan is dropped.   in some cases
+ * an anon may "adopt" an orphaned page.
+ *
+ * locking: to read pg->loan_count either the owner or the page queues
+ * must be locked.   to modify pg->loan_count, both the owner of the page
+ * and the PQs must be locked.   pg->flags is (as always) locked by
+ * the owner of the page.
+ *
+ * note that locking from the "loaned" side is tricky since the object
+ * getting the loaned page has no reference to the page's owner and thus
+ * the owner could "die" at any time.   in order to prevent the owner
+ * from dying the page queues should be locked.   this forces us to sometimes
+ * use "try" locking.
+ *
+ * loans are typically broken by the following events:
+ *  1. write fault to a loaned page 
+ *  2. pageout of clean+inactive O->A loaned page
+ *  3. owner frees page (e.g. pager flush)
+ *
+ * note that loaning a page causes all mappings of the page to become
+ * read-only (via pmap_page_protect).   this could have an unexpected
+ * effect on normal "wired" pages if one is not careful.
+ */
+
+/*
+ * local prototypes
+ */
+
+static int	uvm_loananon __P((struct uvm_faultinfo *, void ***, 
+				int, struct vm_anon *));
+static int	uvm_loanentry __P((struct uvm_faultinfo *, void ***, int));
+static int	uvm_loanuobj __P((struct uvm_faultinfo *, void ***, 
+				int, vaddr_t));
+static int	uvm_loanzero __P((struct uvm_faultinfo *, void ***, int));
+
+/*
+ * inlines
+ */
+
+/*
+ * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
+ *
+ * => "ufi" is the result of a successful map lookup (meaning that
+ *	the maps are locked by the caller)
+ * => we may unlock the maps if needed (for I/O)
+ * => we put our output result in "output"
+ * => we return the number of pages we loaned, or -1 if we had an error
+ */
+
+static __inline int
+uvm_loanentry(ufi, output, flags)
+	struct uvm_faultinfo *ufi;
+	void ***output;
+	int flags;
+{
+	vaddr_t curaddr = ufi->orig_rvaddr;
+	vsize_t togo = ufi->size;
+	struct vm_aref *aref = &ufi->entry->aref;
+	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
+	struct vm_anon *anon;
+	int rv, result = 0;
+
+	/*
+	 * lock us the rest of the way down
+	 */
+	if (aref->ar_amap)
+		amap_lock(aref->ar_amap);
+	if (uobj)
+		simple_lock(&uobj->vmobjlock);
+
+	/*
+	 * loop until done
+	 */
+	while (togo) {
+
+		/*
+		 * find the page we want.   check the anon layer first.
+		 */
+
+		if (aref->ar_amap) {
+			anon = amap_lookup(aref, curaddr - ufi->entry->start);
+		} else {
+			anon = NULL;
+		}
+
+		if (anon) {
+			rv = uvm_loananon(ufi, output, flags, anon);
+		} else if (uobj) {
+			rv = uvm_loanuobj(ufi, output, flags, curaddr);
+		} else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
+			rv = uvm_loanzero(ufi, output, flags);
+		} else {
+			rv = -1;		/* null map entry... fail now */
+		}
+
+		/* total failure */
+		if (rv < 0)
+			return(-1);
+
+		/* relock failed, need to do another lookup */
+		if (rv == 0)
+			return(result);
+
+		/*
+		 * got it... advance to next page
+		 */
+		result++;
+		togo -= PAGE_SIZE;
+		curaddr += PAGE_SIZE;
+	}
+
+	/*
+	 * unlock everything and return
+	 */
+	uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
+	return(result);
+}
+
+/*
+ * normal functions
+ */
+
+/*
+ * uvm_loan: loan pages out to anons or to the kernel
+ * 
+ * => map should be unlocked
+ * => start and len should be multiples of PAGE_SIZE
+ * => result is either an array of anon's or vm_pages (depending on flags)
+ * => flag values: UVM_LOAN_TOANON - loan to anons
+ *                 UVM_LOAN_TOPAGE - loan to wired kernel page
+ *    one and only one of these flags must be set!
+ */
+
+int
+uvm_loan(map, start, len, result, flags)
+	struct vm_map *map;
+	vaddr_t start;
+	vsize_t len;
+	void **result;
+	int flags;
+{
+	struct uvm_faultinfo ufi;
+	void **output;
+	int rv;
+
+	/*
+	 * ensure that one and only one of the flags is set
+	 */
+
+	if ((flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 
+	    (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE) ||
+	    (flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 0)
+		return(KERN_FAILURE);
+
+	/*
+	 * "output" is a pointer to the current place to put the loaned
+	 * page...
+	 */
+
+	output = &result[0];	/* start at the beginning ... */
+
+	/*
+	 * while we've got pages to do
+	 */
+
+	while (len > 0) {
+
+		/*
+		 * fill in params for a call to uvmfault_lookup
+		 */
+
+		ufi.orig_map = map;
+		ufi.orig_rvaddr = start;
+		ufi.orig_size = len;
+		
+		/*
+		 * do the lookup, the only time this will fail is if we hit on
+		 * an unmapped region (an error)
+		 */
+
+		if (!uvmfault_lookup(&ufi, FALSE)) 
+			goto fail;
+
+		/*
+		 * now do the loanout
+		 */
+		rv = uvm_loanentry(&ufi, &output, flags);
+		if (rv < 0) 
+			goto fail;
+
+		/*
+		 * done!   advance pointers and unlock.
+		 */
+		rv <<= PAGE_SHIFT;
+		len -= rv;
+		start += rv;
+		uvmfault_unlockmaps(&ufi, FALSE);
+	}
+	
+	/*
+	 * got it!   return success.
+	 */
+
+	return(KERN_SUCCESS);
+
+fail:
+	/*
+	 * fail: failed to do it.   drop our loans and return failure code.
+	 */
+	if (output - result) {
+		if (flags & UVM_LOAN_TOANON)
+			uvm_unloananon((struct vm_anon **)result,
+			    output - result);
+		else
+			uvm_unloanpage((struct vm_page **)result,
+			    output - result);
+	}
+	return(KERN_FAILURE);
+}
+
+/*
+ * uvm_loananon: loan a page from an anon out
+ * 
+ * => return value:
+ *	-1 = fatal error, everything is unlocked, abort.
+ *	 0 = lookup in ufi went stale, everything unlocked, relookup and
+ *		try again
+ *	 1 = got it, everything still locked
+ */
+
+int
+uvm_loananon(ufi, output, flags, anon)
+	struct uvm_faultinfo *ufi;
+	void ***output;
+	int flags;
+	struct vm_anon *anon;
+{
+	struct vm_page *pg;
+	int result;
+
+	/*
+	 * if we are loaning to another anon then it is easy, we just
+	 * bump the reference count on the current anon and return a
+	 * pointer to it.
+	 */
+	if (flags & UVM_LOAN_TOANON) {
+		simple_lock(&anon->an_lock);
+		pg = anon->u.an_page;
+		if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1)
+			/* read protect it */
+			pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+		anon->an_ref++;
+		**output = anon;
+		*output = (*output) + 1;
+		simple_unlock(&anon->an_lock);
+		return(1);
+	}
+
+	/*
+	 * we are loaning to a kernel-page.   we need to get the page
+	 * resident so we can wire it.   uvmfault_anonget will handle
+	 * this for us.
+	 */
+
+	simple_lock(&anon->an_lock);
+	result = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
+
+	/*
+	 * if we were unable to get the anon, then uvmfault_anonget has
+	 * unlocked everything and returned an error code.
+	 */
+
+	if (result != VM_PAGER_OK) {
+
+		/* need to refault (i.e. refresh our lookup) ? */
+		if (result == VM_PAGER_REFAULT)
+			return(0);
+
+		/* "try again"?   sleep a bit and retry ... */
+		if (result == VM_PAGER_AGAIN) {
+			tsleep((caddr_t)&lbolt, PVM, "loanagain", 0);
+			return(0);
+		}
+
+		/* otherwise flag it as an error */
+		return(-1);
+	}
+
+	/*
+	 * we have the page and its owner locked: do the loan now.
+	 */
+
+	pg = anon->u.an_page;
+	uvm_lock_pageq();
+	if (pg->loan_count == 0)
+		pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+	pg->loan_count++;
+	uvm_pagewire(pg);	/* always wire it */
+	uvm_unlock_pageq();
+	**output = pg;
+	*output = (*output) + 1;
+
+	/* unlock anon and return success */
+	if (pg->uobject)
+		simple_unlock(&pg->uobject->vmobjlock);
+	simple_unlock(&anon->an_lock);
+	return(1);
+}
+
+/*
+ * uvm_loanuobj: loan a page from a uobj out
+ *
+ * => return value:
+ *	-1 = fatal error, everything is unlocked, abort.
+ *	 0 = lookup in ufi went stale, everything unlocked, relookup and
+ *		try again
+ *	 1 = got it, everything still locked
+ */
+
+int
+uvm_loanuobj(ufi, output, flags, va)
+	struct uvm_faultinfo *ufi;
+	void ***output;
+	int flags;
+	vaddr_t va;
+{
+	struct vm_amap *amap = ufi->entry->aref.ar_amap;
+	struct uvm_object *uobj = ufi->entry->object.uvm_obj;
+	struct vm_page *pg;
+	struct vm_anon *anon;
+	int result, npages;
+	boolean_t locked;
+
+	/*
+	 * first we must make sure the page is resident.
+	 *
+	 * XXXCDC: duplicate code with uvm_fault().
+	 */
+
+	if (uobj->pgops->pgo_get) {
+		npages = 1;
+		pg = NULL;
+		result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
+		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
+	} else {
+		result = VM_PAGER_ERROR;
+	}
+
+	/*
+	 * check the result of the locked pgo_get.  if there is a problem,
+	 * then we fail the loan.
+	 */
+
+	if (result != VM_PAGER_OK && result != VM_PAGER_UNLOCK) {
+		uvmfault_unlockall(ufi, amap, uobj, NULL);
+		return(-1);
+	}
+
+	/*
+	 * if we need to unlock for I/O, do so now.
+	 */
+
+	if (result == VM_PAGER_UNLOCK) {
+		uvmfault_unlockall(ufi, amap, NULL, NULL);
+		
+		npages = 1;
+		/* locked: uobj */
+		result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
+		    &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, 0);
+		/* locked: <nothing> */
+		
+		/*
+		 * check for errors
+		 */
+
+		if (result != VM_PAGER_OK) {
+			 if (result == VM_PAGER_AGAIN) {
+				tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
+				return(0); /* redo the lookup and try again */
+			} 
+			return(-1);	/* total failure */
+		}
+
+		/*
+		 * pgo_get was a success.   attempt to relock everything.
+		 */
+
+		locked = uvmfault_relock(ufi);
+		if (locked && amap)
+			amap_lock(amap);
+		simple_lock(&uobj->vmobjlock);
+
+		/*
+		 * verify that the page has not be released and re-verify
+		 * that amap slot is still free.   if there is a problem we
+		 * drop our lock (thus force a lookup refresh/retry).
+		 */
+			
+		if ((pg->flags & PG_RELEASED) != 0 ||
+		    (locked && amap && amap_lookup(&ufi->entry->aref,
+		    ufi->orig_rvaddr - ufi->entry->start))) {
+			
+			if (locked)
+				uvmfault_unlockall(ufi, amap, NULL, NULL);
+			locked = FALSE;
+		} 
+
+		/*
+		 * didn't get the lock?   release the page and retry.
+		 */
+
+		if (locked == FALSE) {
+
+			if (pg->flags & PG_WANTED)
+				/* still holding object lock */
+				thread_wakeup(pg);
+
+			if (pg->flags & PG_RELEASED) {
+#ifdef DIAGNOSTIC
+				if (uobj->pgops->pgo_releasepg == NULL)
+			panic("uvm_loanuobj: object has no releasepg function");
+#endif
+				/* frees page */
+				if (uobj->pgops->pgo_releasepg(pg, NULL))
+					simple_unlock(&uobj->vmobjlock);
+				return (0);
+			}
+
+			uvm_lock_pageq();
+			uvm_pageactivate(pg); /* make sure it is in queues */
+			uvm_unlock_pageq();
+			pg->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(pg, NULL);
+			simple_unlock(&uobj->vmobjlock);
+			return (0);
+		}
+	}
+
+	/*
+	 * at this point we have the page we want ("pg") marked PG_BUSY for us
+	 * and we have all data structures locked.   do the loanout.   page can
+	 * not be PG_RELEASED (we caught this above).
+	 */
+
+	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loan to wired-kernel page? */
+		uvm_lock_pageq();
+		if (pg->loan_count == 0)
+			pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+		pg->loan_count++;
+		uvm_pagewire(pg);
+		uvm_unlock_pageq();
+		**output = pg;
+		*output = (*output) + 1;
+		if (pg->flags & PG_WANTED)
+			thread_wakeup(pg);
+		pg->flags &= ~(PG_WANTED|PG_BUSY);
+		UVM_PAGE_OWN(pg, NULL);
+		return(1);		/* got it! */
+	}
+
+	/*
+	 * must be a loan to an anon.   check to see if there is already
+	 * an anon associated with this page.  if so, then just return
+	 * a reference to this object.   the page should already be 
+	 * mapped read-only because it is already on loan.
+	 */
+
+	if (pg->uanon) {
+		anon = pg->uanon;
+		simple_lock(&anon->an_lock);
+		anon->an_ref++;
+		simple_unlock(&anon->an_lock);
+		**output = anon;
+		*output = (*output) + 1;
+		uvm_lock_pageq();
+		uvm_pageactivate(pg);	/* reactivate */
+		uvm_unlock_pageq();
+		if (pg->flags & PG_WANTED)
+			thread_wakeup(pg);
+		pg->flags &= ~(PG_WANTED|PG_BUSY);
+		UVM_PAGE_OWN(pg, NULL);
+		return(1);
+	}
+	
+	/*
+	 * need to allocate a new anon
+	 */
+
+	anon = uvm_analloc();
+	if (anon == NULL) {		/* out of VM! */
+		if (pg->flags & PG_WANTED)
+			thread_wakeup(pg);
+		pg->flags &= ~(PG_WANTED|PG_BUSY);
+		UVM_PAGE_OWN(pg, NULL);
+		uvmfault_unlockall(ufi, amap, uobj, NULL);
+		return(-1);
+	}
+	anon->u.an_page = pg;
+	pg->uanon = anon;
+	uvm_lock_pageq();
+	if (pg->loan_count == 0)
+		pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+	pg->loan_count++;
+	uvm_pageactivate(pg);
+	uvm_unlock_pageq();
+	**output = anon;
+	*output = (*output) + 1;
+	if (pg->flags & PG_WANTED)
+		thread_wakeup(pg);
+	pg->flags &= ~(PG_WANTED|PG_BUSY);
+	UVM_PAGE_OWN(pg, NULL);
+	return(1);
+}
+
+/*
+ * uvm_loanzero: "loan" a zero-fill page out
+ *
+ * => return value:
+ *	-1 = fatal error, everything is unlocked, abort.
+ *	 0 = lookup in ufi went stale, everything unlocked, relookup and
+ *		try again
+ *	 1 = got it, everything still locked
+ */
+
+int
+uvm_loanzero(ufi, output, flags)
+	struct uvm_faultinfo *ufi;
+	void ***output;
+	int flags;
+{
+	struct vm_anon *anon;
+	struct vm_page *pg;
+
+	if ((flags & UVM_LOAN_TOANON) == 0) {	/* loaning to kernel-page */
+
+		while ((pg = uvm_pagealloc(NULL, 0, NULL)) == NULL) {
+			uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, 
+			    ufi->entry->object.uvm_obj, NULL);
+			uvm_wait("loanzero1");
+			if (!uvmfault_relock(ufi))
+				return(0);
+			if (ufi->entry->aref.ar_amap)
+				amap_lock(ufi->entry->aref.ar_amap);
+			if (ufi->entry->object.uvm_obj)
+				simple_lock(
+				    &ufi->entry->object.uvm_obj->vmobjlock);
+			/* ... and try again */
+		}
+		
+		/* got a page, zero it and return */
+		uvm_pagezero(pg);		/* clears PG_CLEAN */
+		pg->flags &= ~(PG_BUSY|PG_FAKE);
+		UVM_PAGE_OWN(pg, NULL);
+		**output = pg;
+		*output = (*output) + 1;
+		uvm_lock_pageq();
+		/* wire it as we are loaning to kernel-page */
+		uvm_pagewire(pg);
+		pg->loan_count = 1;
+		uvm_unlock_pageq();
+		return(1);
+	}
+
+	/* loaning to an anon */
+	while ((anon = uvm_analloc()) == NULL || 
+	    (pg = uvm_pagealloc(NULL, 0, anon)) == NULL) {
+		
+		/* unlock everything */
+		uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
+		       ufi->entry->object.uvm_obj, NULL);
+		
+		/* out of swap causes us to fail */
+		if (anon == NULL)
+			return(-1);
+
+		uvm_anfree(anon);
+		uvm_wait("loanzero2");		/* wait for pagedaemon */
+
+		if (!uvmfault_relock(ufi))
+			/* map changed while unlocked, need relookup */
+			return (0);
+
+		/* relock everything else */
+		if (ufi->entry->aref.ar_amap)
+			amap_lock(ufi->entry->aref.ar_amap);
+		if (ufi->entry->object.uvm_obj)
+			simple_lock(&ufi->entry->object.uvm_obj->vmobjlock);
+		/* ... and try again */
+	}
+
+	/* got a page, zero it and return */
+	uvm_pagezero(pg);		/* clears PG_CLEAN */
+	pg->flags &= ~(PG_BUSY|PG_FAKE);
+	UVM_PAGE_OWN(pg, NULL);
+	uvm_lock_pageq();
+	uvm_pageactivate(pg);
+	uvm_unlock_pageq();
+	**output = anon;
+	*output = (*output) + 1;
+	return(1);
+}
+
+
+/*
+ * uvm_unloananon: kill loans on anons (basically a normal ref drop)
+ *
+ * => we expect all our resources to be unlocked
+ */
+
+void
+uvm_unloananon(aloans, nanons)
+	struct vm_anon **aloans;
+	int nanons;
+{
+	struct vm_anon *anon;
+
+	while (nanons-- > 0) {
+		int refs;
+
+		anon = *aloans++;
+		simple_lock(&anon->an_lock);
+		refs = --anon->an_ref;
+		simple_unlock(&anon->an_lock);
+
+		if (refs == 0) {
+			uvm_anfree(anon);	/* last reference: kill anon */
+		}
+	}
+}
+
+/*
+ * uvm_unloanpage: kill loans on pages loaned out to the kernel
+ *
+ * => we expect all our resources to be unlocked
+ */
+
+void
+uvm_unloanpage(ploans, npages)
+	struct vm_page **ploans;
+	int npages;
+{
+	struct vm_page *pg;
+
+	uvm_lock_pageq();
+
+	while (npages-- > 0) {
+		pg = *ploans++;
+
+		if (pg->loan_count < 1)
+			panic("uvm_unloanpage: page %p isn't loaned", pg);
+
+		pg->loan_count--;		/* drop loan */
+		uvm_pageunwire(pg);		/* and wire */
+
+		/*
+		 * if page is unowned and we killed last loan, then we can
+		 * free it
+		 */
+		if (pg->loan_count == 0 && pg->uobject == NULL &&
+		    pg->uanon == NULL) {
+
+			if (pg->flags & PG_BUSY)
+	panic("uvm_unloanpage: page %p unowned but PG_BUSY!", pg);
+
+			/* be safe */
+			pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+			uvm_pagefree(pg);	/* pageq locked above */
+
+		}
+	}
+
+	uvm_unlock_pageq();
+}
+
diff --git a/sys/uvm/uvm_loan.h b/sys/uvm/uvm_loan.h
new file mode 100644
index 00000000000..af99b357cf5
--- /dev/null
+++ b/sys/uvm/uvm_loan.h
@@ -0,0 +1,59 @@
+/*	$NetBSD: uvm_loan.h,v 1.5 1998/08/13 02:11:01 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_loan.h,v 1.1.4.1 1997/12/08 16:07:14 chuck Exp
+ */
+
+#ifndef _UVM_UVM_LOAN_H_
+#define _UVM_UVM_LOAN_H_
+
+/*
+ * flags for uvm_loan
+ */
+
+#define UVM_LOAN_TOANON		0x1		/* loan to anon */
+#define UVM_LOAN_TOPAGE		0x2		/* loan to page */
+
+/*
+ * loan prototypes
+ */
+
+int uvm_loan __P((struct vm_map *, vaddr_t, vsize_t, void **, int));
+void uvm_unloananon __P((struct vm_anon **, int));
+void uvm_unloanpage __P((struct vm_page **, int));
+
+#endif /* _UVM_UVM_LOAN_H_ */
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
new file mode 100644
index 00000000000..a5b337db99d
--- /dev/null
+++ b/sys/uvm/uvm_map.c
@@ -0,0 +1,2972 @@
+/*	$NetBSD: uvm_map.c,v 1.34 1999/01/24 23:53:15 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_map.c: uvm map operations
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <sys/user.h>
+#include <machine/pcb.h>
+
+#ifdef SYSVSHM
+#include <sys/shm.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_MAP
+#include <uvm/uvm.h>
+
+#ifdef DDB
+#include <uvm/uvm_ddb.h>
+#endif
+
+
+struct uvm_cnt uvm_map_call, map_backmerge, map_forwmerge;
+struct uvm_cnt uvm_mlk_call, uvm_mlk_hint;
+
+/*
+ * pool for vmspace structures.
+ */
+
+struct pool uvm_vmspace_pool;
+
+/*
+ * pool for dynamically-allocated map entries.
+ */
+
+struct pool uvm_map_entry_pool;
+
+/*
+ * macros
+ */
+
+/*
+ * uvm_map_entry_link: insert entry into a map
+ *
+ * => map must be locked
+ */
+#define uvm_map_entry_link(map, after_where, entry) do { \
+	(map)->nentries++; \
+	(entry)->prev = (after_where); \
+	(entry)->next = (after_where)->next; \
+	(entry)->prev->next = (entry); \
+	(entry)->next->prev = (entry); \
+} while (0)
+
+/*
+ * uvm_map_entry_unlink: remove entry from a map
+ *
+ * => map must be locked
+ */
+#define uvm_map_entry_unlink(map, entry) do { \
+	(map)->nentries--; \
+	(entry)->next->prev = (entry)->prev; \
+	(entry)->prev->next = (entry)->next; \
+} while (0)
+
+/*
+ * SAVE_HINT: saves the specified entry as the hint for future lookups.
+ *
+ * => map need not be locked (protected by hint_lock).
+ */
+#define SAVE_HINT(map,value) do { \
+	simple_lock(&(map)->hint_lock); \
+	(map)->hint = (value); \
+	simple_unlock(&(map)->hint_lock); \
+} while (0)
+
+/*
+ * VM_MAP_RANGE_CHECK: check and correct range
+ *
+ * => map must at least be read locked
+ */
+
+#define VM_MAP_RANGE_CHECK(map, start, end) do { \
+	if (start < vm_map_min(map)) 		\
+		start = vm_map_min(map);        \
+	if (end > vm_map_max(map))              \
+		end = vm_map_max(map);          \
+	if (start > end)                        \
+		start = end;                    \
+} while (0)
+
+/*
+ * local prototypes
+ */
+
+static vm_map_entry_t	uvm_mapent_alloc __P((vm_map_t));
+static void		uvm_mapent_copy __P((vm_map_entry_t,vm_map_entry_t));
+static void		uvm_mapent_free __P((vm_map_entry_t));
+static void		uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+
+/*
+ * local inlines
+ */
+
+#undef UVM_MAP_INLINES
+
+#ifdef UVM_MAP_INLINES
+#define UVM_INLINE __inline
+#else
+#define UVM_INLINE
+#endif
+
+/*
+ * uvm_mapent_alloc: allocate a map entry
+ *
+ * => XXX: static pool for kernel map?
+ */
+
+static UVM_INLINE vm_map_entry_t
+uvm_mapent_alloc(map)
+	vm_map_t map;
+{
+	vm_map_entry_t me;
+	int s;
+	UVMHIST_FUNC("uvm_mapent_alloc");
+	UVMHIST_CALLED(maphist);
+
+	if (map->entries_pageable) {
+		me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
+		me->flags = 0;
+		/* me can't be null, wait ok */
+
+	} else {
+		s = splimp();	/* protect kentry_free list with splimp */
+		simple_lock(&uvm.kentry_lock);
+		me = uvm.kentry_free;
+		if (me) uvm.kentry_free = me->next;
+		simple_unlock(&uvm.kentry_lock);
+		splx(s);
+		if (!me)
+	panic("mapent_alloc: out of kernel map entries, check MAX_KMAPENT");
+		me->flags = UVM_MAP_STATIC;
+	}
+
+	UVMHIST_LOG(maphist, "<- new entry=0x%x [pageable=%d]", 
+		me, map->entries_pageable, 0, 0);
+	return(me);
+
+}
+
+/*
+ * uvm_mapent_free: free map entry
+ *
+ * => XXX: static pool for kernel map?
+ */
+
+static UVM_INLINE void
+uvm_mapent_free(me)
+	vm_map_entry_t me;
+{
+	int s;
+	UVMHIST_FUNC("uvm_mapent_free");
+	UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]", 
+		me, me->flags, 0, 0);
+	if ((me->flags & UVM_MAP_STATIC) == 0) {
+		pool_put(&uvm_map_entry_pool, me);
+	} else {
+		s = splimp();	/* protect kentry_free list with splimp */
+		simple_lock(&uvm.kentry_lock);
+		me->next = uvm.kentry_free;
+		uvm.kentry_free = me;
+		simple_unlock(&uvm.kentry_lock);
+		splx(s);
+	}
+}
+
+/*
+ * uvm_mapent_copy: copy a map entry, preserving flags
+ */
+
+static UVM_INLINE void
+uvm_mapent_copy(src, dst)
+	vm_map_entry_t src;
+	vm_map_entry_t dst;
+{
+
+	bcopy(src, dst, ((char *)&src->uvm_map_entry_stop_copy) - ((char*)src));
+}
+
+/*
+ * uvm_map_entry_unwire: unwire a map entry
+ *
+ * => map should be locked by caller
+ */
+
+static UVM_INLINE void
+uvm_map_entry_unwire(map, entry)
+	vm_map_t map;
+	vm_map_entry_t entry;
+{
+
+	uvm_fault_unwire(map->pmap, entry->start, entry->end);
+	entry->wired_count = 0;
+}
+
+/*
+ * uvm_map_init: init mapping system at boot time.   note that we allocate
+ * and init the static pool of vm_map_entry_t's for the kernel here.
+ */
+
+void
+uvm_map_init() 
+{
+	static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
+#if defined(UVMHIST)
+	static struct uvm_history_ent maphistbuf[100];
+	static struct uvm_history_ent pdhistbuf[100];
+#endif
+	int lcv;
+
+	/*
+	 * first, init logging system.
+	 */
+
+	UVMHIST_FUNC("uvm_map_init");
+	UVMHIST_INIT_STATIC(maphist, maphistbuf);
+	UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
+	UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
+	UVMCNT_INIT(uvm_map_call,  UVMCNT_CNT, 0,
+	    "# uvm_map() successful calls", 0);
+	UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0);
+	UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward",
+	    0);
+	UVMCNT_INIT(uvm_mlk_call,  UVMCNT_CNT, 0, "# map lookup calls", 0);
+	UVMCNT_INIT(uvm_mlk_hint,  UVMCNT_CNT, 0, "# map lookup hint hits", 0);
+
+	/*
+	 * now set up static pool of kernel map entrys ...
+	 */
+
+	simple_lock_init(&uvm.kentry_lock);
+	uvm.kentry_free = NULL;
+	for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
+		kernel_map_entry[lcv].next = uvm.kentry_free;
+		uvm.kentry_free = &kernel_map_entry[lcv];
+	}
+
+	/*
+	 * initialize the map-related pools.
+	 */
+	pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
+	    0, 0, 0, "vmsppl", 0,
+	    pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP);
+	pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
+	    0, 0, 0, "vmmpepl", 0,
+	    pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP);
+}
+
+/*
+ * clippers
+ */
+
+/*
+ * uvm_map_clip_start: ensure that the entry begins at or after
+ *	the starting address, if it doesn't we split the entry.
+ * 
+ * => caller should use UVM_MAP_CLIP_START macro rather than calling
+ *    this directly
+ * => map must be locked by caller
+ */
+
+void uvm_map_clip_start(map, entry, start)
+	vm_map_t       map;
+	vm_map_entry_t entry;
+	vaddr_t    start;
+{
+	vm_map_entry_t new_entry;
+	vaddr_t new_adj;
+
+	/* uvm_map_simplify_entry(map, entry); */ /* XXX */
+
+	/*
+	 * Split off the front portion.  note that we must insert the new
+	 * entry BEFORE this one, so that this entry has the specified
+	 * starting address.
+	 */
+
+	new_entry = uvm_mapent_alloc(map);
+	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
+				
+	new_entry->end = start; 
+	new_adj = start - new_entry->start;
+	if (entry->object.uvm_obj)
+		entry->offset += new_adj;	/* shift start over */
+	entry->start = start;
+
+	if (new_entry->aref.ar_amap) {
+		amap_splitref(&new_entry->aref, &entry->aref, new_adj);
+	}
+
+	uvm_map_entry_link(map, entry->prev, new_entry);
+				 
+	if (UVM_ET_ISSUBMAP(entry)) {
+		/* ... unlikely to happen, but play it safe */
+		 uvm_map_reference(new_entry->object.sub_map);
+	} else {
+		if (UVM_ET_ISOBJ(entry) && 
+		    entry->object.uvm_obj->pgops &&
+		    entry->object.uvm_obj->pgops->pgo_reference)
+			entry->object.uvm_obj->pgops->pgo_reference(
+			    entry->object.uvm_obj);
+	}
+}
+
+/*
+ * uvm_map_clip_end: ensure that the entry ends at or before
+ *	the ending address, if it does't we split the reference
+ * 
+ * => caller should use UVM_MAP_CLIP_END macro rather than calling
+ *    this directly
+ * => map must be locked by caller
+ */
+
+void
+uvm_map_clip_end(map, entry, end)
+	vm_map_t	map;
+	vm_map_entry_t	entry;
+	vaddr_t	end;
+{
+	vm_map_entry_t	new_entry;
+	vaddr_t new_adj; /* #bytes we move start forward */
+
+	/*
+	 *	Create a new entry and insert it
+	 *	AFTER the specified entry
+	 */
+
+	new_entry = uvm_mapent_alloc(map);
+	uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
+
+	new_entry->start = entry->end = end;
+	new_adj = end - entry->start;
+	if (new_entry->object.uvm_obj)
+		new_entry->offset += new_adj;
+
+	if (entry->aref.ar_amap)
+		amap_splitref(&entry->aref, &new_entry->aref, new_adj);
+
+	uvm_map_entry_link(map, entry, new_entry);
+
+	if (UVM_ET_ISSUBMAP(entry)) {
+		/* ... unlikely to happen, but play it safe */
+	 	uvm_map_reference(new_entry->object.sub_map);
+	} else {
+		if (UVM_ET_ISOBJ(entry) &&
+		    entry->object.uvm_obj->pgops &&
+		    entry->object.uvm_obj->pgops->pgo_reference)
+			entry->object.uvm_obj->pgops->pgo_reference(
+			    entry->object.uvm_obj);
+	}
+}
+
+
+/*
+ *   M A P   -   m a i n   e n t r y   p o i n t
+ */
+/*
+ * uvm_map: establish a valid mapping in a map
+ *
+ * => assume startp is page aligned.
+ * => assume size is a multiple of PAGE_SIZE.
+ * => assume sys_mmap provides enough of a "hint" to have us skip
+ *	over text/data/bss area.
+ * => map must be unlocked (we will lock it)
+ * => <uobj,uoffset> value meanings (4 cases):
+ *	 [1] <NULL,uoffset> 		== uoffset is a hint for PMAP_PREFER
+ *	 [2] <NULL,UVM_UNKNOWN_OFFSET>	== don't PMAP_PREFER
+ *	 [3] <uobj,uoffset>		== normal mapping
+ *	 [4] <uobj,UVM_UNKNOWN_OFFSET>	== uvm_map finds offset based on VA
+ *	
+ *    case [4] is for kernel mappings where we don't know the offset until
+ *    we've found a virtual address.   note that kernel object offsets are
+ *    always relative to vm_map_min(kernel_map).
+ * => XXXCDC: need way to map in external amap?
+ */
+
+int
+uvm_map(map, startp, size, uobj, uoffset, flags)
+	vm_map_t map;
+	vaddr_t *startp;	/* IN/OUT */
+	vsize_t size;
+	struct uvm_object *uobj;
+	vaddr_t uoffset;
+	uvm_flag_t flags;
+{
+	vm_map_entry_t prev_entry, new_entry;
+	vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
+	    UVM_MAXPROTECTION(flags);
+	vm_inherit_t inherit = UVM_INHERIT(flags);
+	int advice = UVM_ADVICE(flags);
+	UVMHIST_FUNC("uvm_map");
+	UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(map=0x%x, *startp=0x%x, size=%d, flags=0x%x)",
+	    map, *startp, size, flags);
+	UVMHIST_LOG(maphist, "  uobj/offset 0x%x/%d", uobj, uoffset,0,0);
+
+	/*
+	 * step 0: sanity check of protection code
+	 */
+
+	if ((prot & maxprot) != prot) {
+		UVMHIST_LOG(maphist, "<- prot. failure:  prot=0x%x, max=0x%x", 
+		prot, maxprot,0,0);
+		return(KERN_PROTECTION_FAILURE);
+	}
+
+	/*
+	 * step 1: figure out where to put new VM range
+	 */
+
+	if (vm_map_lock_try(map) == FALSE) {
+		if (flags & UVM_FLAG_TRYLOCK)
+			return(KERN_FAILURE);
+		vm_map_lock(map); /* could sleep here */
+	}
+	if ((prev_entry = uvm_map_findspace(map, *startp, size, startp, 
+	    uobj, uoffset, flags & UVM_FLAG_FIXED)) == NULL) {
+		UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0);
+		vm_map_unlock(map);
+		return (KERN_NO_SPACE);
+	}
+
+#if defined(PMAP_GROWKERNEL)	/* hack */
+	{
+		/* locked by kernel_map lock */
+		static vaddr_t maxkaddr = 0;
+		
+		/*
+		 * hack: grow kernel PTPs in advance.
+		 */
+		if (map == kernel_map && maxkaddr < (*startp + size)) {
+			pmap_growkernel(*startp + size);
+			maxkaddr = *startp + size;
+		}
+	}
+#endif
+
+	UVMCNT_INCR(uvm_map_call);
+
+	/*
+	 * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
+	 * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET.   in 
+	 * either case we want to zero it  before storing it in the map entry 
+	 * (because it looks strange and confusing when debugging...)
+	 * 
+	 * if uobj is not null 
+	 *   if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
+	 *      and we do not need to change uoffset.
+	 *   if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
+	 *      now (based on the starting address of the map).   this case is
+	 *      for kernel object mappings where we don't know the offset until
+	 *      the virtual address is found (with uvm_map_findspace).   the
+	 *      offset is the distance we are from the start of the map.
+	 */
+
+	if (uobj == NULL) {
+		uoffset = 0;
+	} else {
+		if (uoffset == UVM_UNKNOWN_OFFSET) {
+#ifdef DIAGNOSTIC
+			if (uobj->uo_refs != UVM_OBJ_KERN)
+	panic("uvm_map: unknown offset with non-kernel object");
+#endif
+			uoffset = *startp - vm_map_min(kernel_map);
+		}
+	}
+
+	/*
+	 * step 2: try and insert in map by extending previous entry, if
+	 * possible
+	 * XXX: we don't try and pull back the next entry.   might be useful
+	 * for a stack, but we are currently allocating our stack in advance.
+	 */
+
+	if ((flags & UVM_FLAG_NOMERGE) == 0 && 
+	    prev_entry->end == *startp && prev_entry != &map->header &&
+	    prev_entry->object.uvm_obj == uobj) {
+
+		if (uobj && prev_entry->offset +
+		    (prev_entry->end - prev_entry->start) != uoffset)
+			goto step3;
+
+		if (UVM_ET_ISSUBMAP(prev_entry))
+			goto step3;
+
+		if (prev_entry->protection != prot || 
+		    prev_entry->max_protection != maxprot)
+			goto step3;
+
+		if (prev_entry->inheritance != inherit ||
+		    prev_entry->advice != advice)
+			goto step3;
+
+		/* wired_count's must match (new area is unwired) */
+		if (prev_entry->wired_count)
+			goto step3; 
+
+		/*
+		 * can't extend a shared amap.  note: no need to lock amap to 
+		 * look at refs since we don't care about its exact value.
+		 * if it is one (i.e. we have only reference) it will stay there
+		 */
+		   
+		if (prev_entry->aref.ar_amap &&
+		    amap_refs(prev_entry->aref.ar_amap) != 1) {
+			goto step3;
+		}
+		
+		/* got it! */
+
+		UVMCNT_INCR(map_backmerge);
+		UVMHIST_LOG(maphist,"  starting back merge", 0, 0, 0, 0);
+
+		/*
+		 * drop our reference to uobj since we are extending a reference
+		 * that we already have (the ref count can not drop to zero).
+		 */
+		if (uobj && uobj->pgops->pgo_detach)
+			uobj->pgops->pgo_detach(uobj);
+
+		if (prev_entry->aref.ar_amap) {
+			amap_extend(prev_entry, size);
+		}
+
+		prev_entry->end += size;
+		map->size += size;
+
+		UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
+		vm_map_unlock(map);
+		return (KERN_SUCCESS);
+
+	}
+step3:
+	UVMHIST_LOG(maphist,"  allocating new map entry", 0, 0, 0, 0);
+
+	/*
+	 * check for possible forward merge (which we don't do) and count
+	 * the number of times we missed a *possible* chance to merge more 
+	 */
+
+	if ((flags & UVM_FLAG_NOMERGE) == 0 &&
+	    prev_entry->next != &map->header && 
+	    prev_entry->next->start == (*startp + size))
+		UVMCNT_INCR(map_forwmerge);
+
+	/*
+	 * step 3: allocate new entry and link it in
+	 */
+
+	new_entry = uvm_mapent_alloc(map);
+	new_entry->start = *startp;
+	new_entry->end = new_entry->start + size;
+	new_entry->object.uvm_obj = uobj;
+	new_entry->offset = uoffset;
+
+	if (uobj) 
+		new_entry->etype = UVM_ET_OBJ;
+	else
+		new_entry->etype = 0;
+
+	if (flags & UVM_FLAG_COPYONW) {
+		new_entry->etype |= UVM_ET_COPYONWRITE;
+		if ((flags & UVM_FLAG_OVERLAY) == 0)
+			new_entry->etype |= UVM_ET_NEEDSCOPY;
+	}
+
+	new_entry->protection = prot;
+	new_entry->max_protection = maxprot;
+	new_entry->inheritance = inherit;
+	new_entry->wired_count = 0;
+	new_entry->advice = advice;
+	if (flags & UVM_FLAG_OVERLAY) {
+		/*
+		 * to_add: for BSS we overallocate a little since we
+		 * are likely to extend
+		 */
+		vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ? 
+			UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
+		struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
+		new_entry->aref.ar_pageoff = 0;
+		new_entry->aref.ar_amap = amap;
+	} else {
+		new_entry->aref.ar_amap = NULL;
+	}
+
+	uvm_map_entry_link(map, prev_entry, new_entry);
+
+	map->size += size;
+
+	/*
+	 *      Update the free space hint
+	 */
+
+	if ((map->first_free == prev_entry) &&
+	    (prev_entry->end >= new_entry->start))
+		map->first_free = new_entry;
+
+	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+	vm_map_unlock(map);
+	return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_lookup_entry: find map entry at or before an address
+ *
+ * => map must at least be read-locked by caller
+ * => entry is returned in "entry"
+ * => return value is true if address is in the returned entry
+ */
+
+boolean_t
+uvm_map_lookup_entry(map, address, entry)
+	vm_map_t	map;
+	vaddr_t	address;
+	vm_map_entry_t		*entry;		/* OUT */
+{
+	vm_map_entry_t		cur;
+	vm_map_entry_t		last;
+	UVMHIST_FUNC("uvm_map_lookup_entry");
+	UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)",
+	    map, address, entry, 0);
+
+	/*
+	 * start looking either from the head of the
+	 * list, or from the hint.
+	 */
+
+	simple_lock(&map->hint_lock);
+	cur = map->hint;
+	simple_unlock(&map->hint_lock);
+
+	if (cur == &map->header)
+		cur = cur->next;
+
+	UVMCNT_INCR(uvm_mlk_call);
+	if (address >= cur->start) {
+	    	/*
+		 * go from hint to end of list.
+		 *
+		 * but first, make a quick check to see if
+		 * we are already looking at the entry we
+		 * want (which is usually the case).
+		 * note also that we don't need to save the hint
+		 * here... it is the same hint (unless we are
+		 * at the header, in which case the hint didn't
+		 * buy us anything anyway).
+		 */
+		last = &map->header;
+		if ((cur != last) && (cur->end > address)) {
+			UVMCNT_INCR(uvm_mlk_hint);
+			*entry = cur;
+			UVMHIST_LOG(maphist,"<- got it via hint (0x%x)",
+			    cur, 0, 0, 0);
+			return (TRUE);
+		}
+	} else {
+	    	/*
+		 * go from start to hint, *inclusively*
+		 */
+		last = cur->next;
+		cur = map->header.next;
+	}
+
+	/*
+	 * search linearly
+	 */
+
+	while (cur != last) {
+		if (cur->end > address) {
+			if (address >= cur->start) {
+			    	/*
+				 * save this lookup for future
+				 * hints, and return
+				 */
+
+				*entry = cur;
+				SAVE_HINT(map, cur);
+				UVMHIST_LOG(maphist,"<- search got it (0x%x)",
+					cur, 0, 0, 0);
+				return (TRUE);
+			}
+			break;
+		}
+		cur = cur->next;
+	}
+	*entry = cur->prev;
+	SAVE_HINT(map, *entry);
+	UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
+	return (FALSE);
+}
+
+
+/*
+ * uvm_map_findspace: find "length" sized space in "map".
+ *
+ * => "hint" is a hint about where we want it, unless fixed is true
+ *	(in which case we insist on using "hint").
+ * => "result" is VA returned
+ * => uobj/uoffset are to be used to handle VAC alignment, if required
+ * => caller must at least have read-locked map
+ * => returns NULL on failure, or pointer to prev. map entry if success
+ * => note this is a cross between the old vm_map_findspace and vm_map_find
+ */
+
+vm_map_entry_t
+uvm_map_findspace(map, hint, length, result, uobj, uoffset, fixed)
+	vm_map_t map;
+	vaddr_t hint;
+	vsize_t length;
+	vaddr_t *result; /* OUT */
+	struct uvm_object *uobj;
+	vaddr_t uoffset;
+	boolean_t fixed;
+{
+	vm_map_entry_t entry, next, tmp;
+	vaddr_t end;
+	UVMHIST_FUNC("uvm_map_findspace");
+	UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, fixed=%d)", 
+		map, hint, length, fixed);
+
+	if (hint < map->min_offset) {	/* check ranges ... */
+		if (fixed) {
+			UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
+			return(NULL);
+		}
+		hint = map->min_offset;
+	}
+	if (hint > map->max_offset) {
+		UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]",
+				hint, map->min_offset, map->max_offset, 0);
+		return(NULL);
+	}
+
+	/*
+	 * Look for the first possible address; if there's already
+	 * something at this address, we have to start after it.
+	 */
+
+	if (!fixed && hint == map->min_offset) {
+		if ((entry = map->first_free) != &map->header) 
+			hint = entry->end;
+	} else {
+		if (uvm_map_lookup_entry(map, hint, &tmp)) {
+			/* "hint" address already in use ... */
+			if (fixed) {
+				UVMHIST_LOG(maphist,"<- fixed & VA in use",
+				    0, 0, 0, 0);
+				return(NULL);
+			}
+			hint = tmp->end;
+		}
+		entry = tmp;
+	}
+
+	/*
+	 * Look through the rest of the map, trying to fit a new region in
+	 * the gap between existing regions, or after the very last region.
+	 * note: entry->end   = base VA of current gap,
+	 *	 next->start  = VA of end of current gap
+	 */
+	for (;; hint = (entry = next)->end) {
+		/*
+		 * Find the end of the proposed new region.  Be sure we didn't
+		 * go beyond the end of the map, or wrap around the address;
+		 * if so, we lose.  Otherwise, if this is the last entry, or
+		 * if the proposed new region fits before the next entry, we
+		 * win.
+		 */
+
+#ifdef PMAP_PREFER
+		/*
+		 * push hint forward as needed to avoid VAC alias problems.
+		 * we only do this if a valid offset is specified.
+		 */
+		if (!fixed && uoffset != UVM_UNKNOWN_OFFSET)
+		  PMAP_PREFER(uoffset, &hint);
+#endif
+		end = hint + length;
+		if (end > map->max_offset || end < hint) {
+			UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0);
+			return (NULL);
+		}
+		next = entry->next;
+		if (next == &map->header || next->start >= end)
+			break;
+		if (fixed) {
+			UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0);
+			return(NULL); /* only one shot at it ... */
+		}
+	}
+	SAVE_HINT(map, entry);
+	*result = hint;
+	UVMHIST_LOG(maphist,"<- got it!  (result=0x%x)", hint, 0,0,0);
+	return (entry);
+}
+
+/*
+ *   U N M A P   -   m a i n   h e l p e r   f u n c t i o n s
+ */
+
+/*
+ * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
+ *
+ * => caller must check alignment and size 
+ * => map must be locked by caller
+ * => we return a list of map entries that we've remove from the map
+ *    in "entry_list"
+ */
+
+int
+uvm_unmap_remove(map, start, end, entry_list)
+	vm_map_t map;
+	vaddr_t start,end;
+	vm_map_entry_t *entry_list;	/* OUT */
+{
+	vm_map_entry_t entry, first_entry, next;
+	vaddr_t len;
+	UVMHIST_FUNC("uvm_unmap_remove");
+	UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
+	    map, start, end, 0);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	/*
+	 * find first entry
+	 */
+	if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
+		/* clip and go... */
+		entry = first_entry;
+		UVM_MAP_CLIP_START(map, entry, start);
+		/* critical!  prevents stale hint */
+		SAVE_HINT(map, entry->prev);
+
+	} else {
+		entry = first_entry->next;
+	}
+
+	/*
+	 * Save the free space hint
+	 */
+
+	if (map->first_free->start >= start)
+		map->first_free = entry->prev;
+
+	/*
+	 * note: we now re-use first_entry for a different task.  we remove
+	 * a number of map entries from the map and save them in a linked
+	 * list headed by "first_entry".  once we remove them from the map
+	 * the caller should unlock the map and drop the references to the
+	 * backing objects [c.f. uvm_unmap_detach].  the object is to
+	 * seperate unmapping from reference dropping.  why?
+	 *   [1] the map has to be locked for unmapping
+	 *   [2] the map need not be locked for reference dropping
+	 *   [3] dropping references may trigger pager I/O, and if we hit
+	 *       a pager that does synchronous I/O we may have to wait for it.
+	 *   [4] we would like all waiting for I/O to occur with maps unlocked
+	 *       so that we don't block other threads.  
+	 */
+	first_entry = NULL;
+	*entry_list = NULL;		/* to be safe */
+
+	/*
+	 * break up the area into map entry sized regions and unmap.  note 
+	 * that all mappings have to be removed before we can even consider
+	 * dropping references to amaps or VM objects (otherwise we could end
+	 * up with a mapping to a page on the free list which would be very bad)
+	 */
+
+	while ((entry != &map->header) && (entry->start < end)) {
+
+		UVM_MAP_CLIP_END(map, entry, end); 
+		next = entry->next;
+		len = entry->end - entry->start;
+	
+		/*
+		 * unwire before removing addresses from the pmap; otherwise
+		 * unwiring will put the entries back into the pmap (XXX).
+		 */
+
+		if (entry->wired_count)
+			uvm_map_entry_unwire(map, entry);
+
+		/*
+		 * special case: handle mappings to anonymous kernel objects.
+		 * we want to free these pages right away...
+		 */
+		if (UVM_ET_ISOBJ(entry) &&
+		    entry->object.uvm_obj->uo_refs == UVM_OBJ_KERN) {
+
+#ifdef DIAGNOSTIC
+			if (vm_map_pmap(map) != pmap_kernel())
+	panic("uvm_unmap_remove: kernel object mapped by non-kernel map");
+#endif
+
+			/*
+			 * note: kernel object mappings are currently used in
+			 * two ways:
+			 *  [1] "normal" mappings of pages in the kernel object
+			 *  [2] uvm_km_valloc'd allocations in which we
+			 *      pmap_enter in some non-kernel-object page
+			 *      (e.g. vmapbuf).
+			 *
+			 * for case [1], we need to remove the mapping from
+			 * the pmap and then remove the page from the kernel
+			 * object (because, once pages in a kernel object are
+			 * unmapped they are no longer needed, unlike, say,
+			 * a vnode where you might want the data to persist
+			 * until flushed out of a queue).
+			 *
+			 * for case [2], we need to remove the mapping from
+			 * the pmap.  there shouldn't be any pages at the
+			 * specified offset in the kernel object [but it
+			 * doesn't hurt to call uvm_km_pgremove just to be
+			 * safe?]
+			 *
+			 * uvm_km_pgremove currently does the following: 
+			 *   for pages in the kernel object in range: 
+			 *     - pmap_page_protect them out of all pmaps
+			 *     - uvm_pagefree the page
+			 *
+			 * note that in case [1] the pmap_page_protect call
+			 * in uvm_km_pgremove may very well be redundant
+			 * because we have already removed the mappings
+			 * beforehand with pmap_remove (or pmap_kremove).
+			 * in the PMAP_NEW case, the pmap_page_protect call
+			 * may not do anything, since PMAP_NEW allows the
+			 * kernel to enter/remove kernel mappings without
+			 * bothing to keep track of the mappings (e.g. via
+			 * pv_entry lists).    XXX: because of this, in the
+			 * future we should consider removing the
+			 * pmap_page_protect from uvm_km_pgremove some time
+			 * in the future.
+			 */
+
+			/*
+			 * remove mappings from pmap
+			 */
+#if defined(PMAP_NEW)
+			pmap_kremove(entry->start, len);
+#else
+			pmap_remove(pmap_kernel(), entry->start,
+			    entry->start+len);
+#endif
+
+			/*
+			 * remove pages from a kernel object (offsets are
+			 * always relative to vm_map_min(kernel_map)).
+			 */
+			uvm_km_pgremove(entry->object.uvm_obj, 
+			entry->start - vm_map_min(kernel_map),
+			entry->end - vm_map_min(kernel_map));
+
+			/*
+			 * null out kernel_object reference, we've just
+			 * dropped it
+			 */
+			entry->etype &= ~UVM_ET_OBJ;
+			entry->object.uvm_obj = NULL;	/* to be safe */
+
+		} else {
+			/*
+		 	 * remove mappings the standard way.
+		 	 */
+			pmap_remove(map->pmap, entry->start, entry->end);
+		}
+
+		/*
+		 * remove entry from map and put it on our list of entries 
+		 * that we've nuked.  then go do next entry.
+		 */
+		UVMHIST_LOG(maphist, "  removed map entry 0x%x", entry, 0, 0,0);
+		uvm_map_entry_unlink(map, entry);
+		map->size -= len;
+		entry->next = first_entry;
+		first_entry = entry;
+		entry = next;		/* next entry, please */
+	}
+
+	/*
+	 * now we've cleaned up the map and are ready for the caller to drop
+	 * references to the mapped objects.  
+	 */
+
+	*entry_list = first_entry;
+	UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+	return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_unmap_detach: drop references in a chain of map entries
+ *
+ * => we will free the map entries as we traverse the list.
+ */
+
+void
+uvm_unmap_detach(first_entry, amap_unref_flags)
+	vm_map_entry_t first_entry;
+	int amap_unref_flags;
+{
+	vm_map_entry_t next_entry;
+	UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
+
+	while (first_entry) {
+
+#ifdef DIAGNOSTIC
+		/*
+		 * sanity check
+		 */
+		/* was part of vm_map_entry_delete() */
+		if (first_entry->wired_count)
+			panic("unmap: still wired!");
+#endif
+
+		UVMHIST_LOG(maphist,
+		    "  detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d", 
+		    first_entry, first_entry->aref.ar_amap, 
+		    first_entry->object.uvm_obj,
+		    UVM_ET_ISSUBMAP(first_entry));
+
+		/*
+		 * drop reference to amap, if we've got one
+		 */
+
+		if (first_entry->aref.ar_amap)
+			amap_unref(first_entry, amap_unref_flags);
+
+		/*
+		 * drop reference to our backing object, if we've got one
+		 */
+		
+		if (UVM_ET_ISSUBMAP(first_entry)) {
+			/* ... unlikely to happen, but play it safe */
+			uvm_map_deallocate(first_entry->object.sub_map);
+		} else {
+			if (UVM_ET_ISOBJ(first_entry) &&
+			    first_entry->object.uvm_obj->pgops->pgo_detach)
+				first_entry->object.uvm_obj->pgops->
+				    pgo_detach(first_entry->object.uvm_obj);
+		}
+
+		/*
+		 * next entry
+		 */
+		next_entry = first_entry->next;
+		uvm_mapent_free(first_entry);
+		first_entry = next_entry;
+	}
+
+	/*
+	 * done!
+	 */
+	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
+	return;
+}
+
+/*
+ *   E X T R A C T I O N   F U N C T I O N S
+ */
+
+/* 
+ * uvm_map_reserve: reserve space in a vm_map for future use.
+ *
+ * => we reserve space in a map by putting a dummy map entry in the 
+ *    map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
+ * => map should be unlocked (we will write lock it)
+ * => we return true if we were able to reserve space
+ * => XXXCDC: should be inline?
+ */
+
+int
+uvm_map_reserve(map, size, offset, raddr)
+	vm_map_t map;
+	vsize_t size;
+	vaddr_t offset;    /* hint for pmap_prefer */
+	vaddr_t *raddr;	/* OUT: reserved VA */
+{
+	UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist); 
+ 
+	UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
+	      map,size,offset,raddr);
+ 
+	size = round_page(size);
+	if (*raddr < vm_map_min(map))
+		*raddr = vm_map_min(map);                /* hint */
+ 
+	/*
+	 * reserve some virtual space.
+	 */
+ 
+	if (uvm_map(map, raddr, size, NULL, offset,
+	    UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
+	    UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != KERN_SUCCESS) {
+	    UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
+		return (FALSE);
+	}     
+	
+	UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
+	return (TRUE);
+}
+
+/*
+ * uvm_map_replace: replace a reserved (blank) area of memory with 
+ * real mappings.
+ *
+ * => caller must WRITE-LOCK the map 
+ * => we return TRUE if replacement was a success
+ * => we expect the newents chain to have nnewents entrys on it and
+ *    we expect newents->prev to point to the last entry on the list
+ * => note newents is allowed to be NULL
+ */
+
+int
+uvm_map_replace(map, start, end, newents, nnewents)
+	struct vm_map *map;
+	vaddr_t start, end;
+	vm_map_entry_t newents;
+	int nnewents;
+{
+	vm_map_entry_t oldent, last;
+	UVMHIST_FUNC("uvm_map_replace");
+	UVMHIST_CALLED(maphist);
+
+	/*
+	 * first find the blank map entry at the specified address
+	 */
+	
+	if (!uvm_map_lookup_entry(map, start, &oldent)) {
+		return(FALSE);
+	}
+	
+	/*
+	 * check to make sure we have a proper blank entry
+	 */
+
+	if (oldent->start != start || oldent->end != end || 
+	    oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
+		return (FALSE);
+	}
+
+#ifdef DIAGNOSTIC
+	/*
+	 * sanity check the newents chain
+	 */
+	{
+		vm_map_entry_t tmpent = newents;
+		int nent = 0;
+		vaddr_t cur = start;
+
+		while (tmpent) {
+			nent++;
+			if (tmpent->start < cur)
+				panic("uvm_map_replace1");
+			if (tmpent->start > tmpent->end || tmpent->end > end) {
+		printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
+			    tmpent->start, tmpent->end, end);
+				panic("uvm_map_replace2");
+			}
+			cur = tmpent->end;
+			if (tmpent->next) {
+				if (tmpent->next->prev != tmpent)
+					panic("uvm_map_replace3");
+			} else {
+				if (newents->prev != tmpent)
+					panic("uvm_map_replace4");
+			}
+			tmpent = tmpent->next;
+		}
+		if (nent != nnewents)
+			panic("uvm_map_replace5");
+	}
+#endif
+
+	/*
+	 * map entry is a valid blank!   replace it.   (this does all the
+	 * work of map entry link/unlink...).
+	 */
+
+	if (newents) {
+
+		last = newents->prev;		/* we expect this */
+
+		/* critical: flush stale hints out of map */
+		SAVE_HINT(map, newents);
+		if (map->first_free == oldent)
+			map->first_free = last;
+
+		last->next = oldent->next;
+		last->next->prev = last;
+		newents->prev = oldent->prev;
+		newents->prev->next = newents;
+		map->nentries = map->nentries + (nnewents - 1);
+
+	} else {
+
+		/* critical: flush stale hints out of map */
+		SAVE_HINT(map, oldent->prev);
+		if (map->first_free == oldent)
+			map->first_free = oldent->prev;
+
+		/* NULL list of new entries: just remove the old one */
+		uvm_map_entry_unlink(map, oldent);
+	}
+
+
+	/*
+	 * now we can free the old blank entry, unlock the map and return.
+	 */
+
+	uvm_mapent_free(oldent);
+	return(TRUE);
+}
+
+/*
+ * uvm_map_extract: extract a mapping from a map and put it somewhere
+ *	(maybe removing the old mapping)
+ *
+ * => maps should be unlocked (we will write lock them)
+ * => returns 0 on success, error code otherwise
+ * => start must be page aligned
+ * => len must be page sized
+ * => flags:
+ *      UVM_EXTRACT_REMOVE: remove mappings from srcmap
+ *      UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
+ *      UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
+ *      UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
+ *    >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
+ *    >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
+ *             be used from within the kernel in a kernel level map <<<
+ */
+
+int
+uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
+	vm_map_t srcmap, dstmap;
+	vaddr_t start, *dstaddrp;
+	vsize_t len;
+	int flags;
+{
+	vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
+	    oldstart;
+	vm_map_entry_t chain, endchain, entry, orig_entry, newentry, deadentry;
+	vm_map_entry_t oldentry;
+	vsize_t elen;
+	int nchain, error, copy_ok;
+	UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start,
+	    len,0);
+	UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0);
+
+#ifdef DIAGNOSTIC
+	/*
+	 * step 0: sanity check: start must be on a page boundary, length
+	 * must be page sized.  can't ask for CONTIG/QREF if you asked for
+	 * REMOVE.
+	 */
+	if ((start & PAGE_MASK) || (len & PAGE_MASK))
+		panic("uvm_map_extract1");
+	if (flags & UVM_EXTRACT_REMOVE)
+		if (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF))
+			panic("uvm_map_extract2");
+#endif
+
+
+	/*
+	 * step 1: reserve space in the target map for the extracted area
+	 */
+
+	dstaddr = *dstaddrp;
+	if (uvm_map_reserve(dstmap, len, start, &dstaddr) == FALSE)
+		return(ENOMEM);
+	*dstaddrp = dstaddr;	/* pass address back to caller */
+	UVMHIST_LOG(maphist, "  dstaddr=0x%x", dstaddr,0,0,0);
+
+
+	/*
+	 * step 2: setup for the extraction process loop by init'ing the 
+	 * map entry chain, locking src map, and looking up the first useful
+	 * entry in the map.
+	 */
+
+	end = start + len;
+	newend = dstaddr + len;
+	chain = endchain = NULL;
+	nchain = 0;
+	vm_map_lock(srcmap);
+
+	if (uvm_map_lookup_entry(srcmap, start, &entry)) {
+
+		/* "start" is within an entry */
+		if (flags & UVM_EXTRACT_QREF) {
+			/*
+			 * for quick references we don't clip the entry, so
+			 * the entry may map space "before" the starting
+			 * virtual address... this is the "fudge" factor
+			 * (which can be non-zero only the first time
+			 * through the "while" loop in step 3).
+			 */
+			fudge = start - entry->start;
+		} else {
+			/*
+			 * normal reference: we clip the map to fit (thus
+			 * fudge is zero)
+			 */
+			UVM_MAP_CLIP_START(srcmap, entry, start);
+			SAVE_HINT(srcmap, entry->prev);
+			fudge = 0;
+		}
+
+	} else {
+		
+		/* "start" is not within an entry ... skip to next entry */
+		if (flags & UVM_EXTRACT_CONTIG) {
+			error = EINVAL;
+			goto bad;    /* definite hole here ... */
+		}
+
+		entry = entry->next;
+		fudge = 0;
+	}
+	/* save values from srcmap for step 6 */
+	orig_entry = entry;
+	orig_fudge = fudge;
+
+
+	/*
+	 * step 3: now start looping through the map entries, extracting
+	 * as we go.
+	 */
+
+	while (entry->start < end && entry != &srcmap->header) {
+		
+		/* if we are not doing a quick reference, clip it */
+		if ((flags & UVM_EXTRACT_QREF) == 0)
+			UVM_MAP_CLIP_END(srcmap, entry, end);
+
+		/* clear needs_copy (allow chunking) */
+		if (UVM_ET_ISNEEDSCOPY(entry)) {
+			if (fudge)
+				oldstart = entry->start;
+			else
+				oldstart = 0;	/* XXX: gcc */
+			amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
+			if (UVM_ET_ISNEEDSCOPY(entry)) {  /* failed? */
+				error = ENOMEM;
+				goto bad;
+			}
+			/* amap_copy could clip (during chunk)!  update fudge */
+			if (fudge) {
+				fudge = fudge - (entry->start - oldstart);
+				orig_fudge = fudge;
+			}
+		}
+
+		/* calculate the offset of this from "start" */
+		oldoffset = (entry->start + fudge) - start;
+
+		/* allocate a new map entry */
+		newentry = uvm_mapent_alloc(dstmap);
+		if (newentry == NULL) {
+			error = ENOMEM;
+			goto bad;
+		}
+
+		/* set up new map entry */
+		newentry->next = NULL;
+		newentry->prev = endchain;
+		newentry->start = dstaddr + oldoffset;
+		newentry->end =
+		    newentry->start + (entry->end - (entry->start + fudge));
+		if (newentry->end > newend)
+			newentry->end = newend;
+		newentry->object.uvm_obj = entry->object.uvm_obj;
+		if (newentry->object.uvm_obj) {
+			if (newentry->object.uvm_obj->pgops->pgo_reference)
+				newentry->object.uvm_obj->pgops->
+				    pgo_reference(newentry->object.uvm_obj);
+				newentry->offset = entry->offset + fudge;
+		} else {
+			newentry->offset = 0;
+		}
+		newentry->etype = entry->etype;
+		newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ? 
+			entry->max_protection : entry->protection; 
+		newentry->max_protection = entry->max_protection;
+		newentry->inheritance = entry->inheritance;
+		newentry->wired_count = 0;
+		newentry->aref.ar_amap = entry->aref.ar_amap;
+		if (newentry->aref.ar_amap) {
+			newentry->aref.ar_pageoff =
+			    entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
+			amap_ref(newentry, AMAP_SHARED |
+			    ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
+		} else {
+			newentry->aref.ar_pageoff = 0;
+		}
+		newentry->advice = entry->advice;
+
+		/* now link it on the chain */
+		nchain++;
+		if (endchain == NULL) {
+			chain = endchain = newentry;
+		} else {
+			endchain->next = newentry;
+			endchain = newentry;
+		}
+
+		/* end of 'while' loop! */
+		if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end && 
+		    (entry->next == &srcmap->header ||
+		    entry->next->start != entry->end)) {
+			error = EINVAL;
+			goto bad;
+		}
+		entry = entry->next;
+		fudge = 0;
+	}
+
+
+	/*
+	 * step 4: close off chain (in format expected by uvm_map_replace)
+	 */
+
+	if (chain)
+		chain->prev = endchain;
+
+
+	/*
+	 * step 5: attempt to lock the dest map so we can pmap_copy.
+	 * note usage of copy_ok: 
+	 *   1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
+	 *   0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
+	 */
+	
+	if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
+
+		copy_ok = 1;
+		if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
+		    nchain)) {
+			if (srcmap != dstmap)
+				vm_map_unlock(dstmap);
+			error = EIO;
+			goto bad;
+		}
+
+	} else {
+
+		copy_ok = 0;
+		/* replace defered until step 7 */
+
+	}
+
+		
+	/*
+	 * step 6: traverse the srcmap a second time to do the following:
+	 *  - if we got a lock on the dstmap do pmap_copy
+	 *  - if UVM_EXTRACT_REMOVE remove the entries
+	 * we make use of orig_entry and orig_fudge (saved in step 2)
+	 */
+
+	if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
+
+		/* purge possible stale hints from srcmap */
+		if (flags & UVM_EXTRACT_REMOVE) {
+			SAVE_HINT(srcmap, orig_entry->prev);
+			if (srcmap->first_free->start >= start)
+				srcmap->first_free = orig_entry->prev;
+		}
+
+		entry = orig_entry;
+		fudge = orig_fudge;
+		deadentry = NULL;	/* for UVM_EXTRACT_REMOVE */
+
+		while (entry->start < end && entry != &srcmap->header) {
+
+			if (copy_ok) {
+	oldoffset = (entry->start + fudge) - start;
+	elen = min(end, entry->end) - (entry->start + fudge);
+	pmap_copy(dstmap->pmap, srcmap->pmap, dstaddr + oldoffset, 
+		  elen, entry->start + fudge);
+			}
+
+      /* we advance "entry" in the following if statement */
+			if (flags & UVM_EXTRACT_REMOVE) {
+				pmap_remove(srcmap->pmap, entry->start, 
+						entry->end);
+        			oldentry = entry;	/* save entry */
+        			entry = entry->next;	/* advance */
+				uvm_map_entry_unlink(srcmap, oldentry);
+							/* add to dead list */
+				oldentry->next = deadentry;
+				deadentry = oldentry;
+      			} else {
+        			entry = entry->next;		/* advance */
+			}
+
+			/* end of 'while' loop */
+			fudge = 0;
+		}
+
+		/*
+		 * unlock dstmap.  we will dispose of deadentry in
+		 * step 7 if needed
+		 */
+		if (copy_ok && srcmap != dstmap)
+			vm_map_unlock(dstmap);
+
+	}
+	else
+		deadentry = NULL; /* XXX: gcc */
+
+	/*
+	 * step 7: we are done with the source map, unlock.   if copy_ok
+	 * is 0 then we have not replaced the dummy mapping in dstmap yet
+	 * and we need to do so now.
+	 */
+
+	vm_map_unlock(srcmap);
+	if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
+		uvm_unmap_detach(deadentry, 0);   /* dispose of old entries */
+
+	/* now do the replacement if we didn't do it in step 5 */
+	if (copy_ok == 0) {
+		vm_map_lock(dstmap);
+		error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
+		    nchain);
+		vm_map_unlock(dstmap);
+
+		if (error == FALSE) {
+			error = EIO;
+			goto bad2;
+		}
+	}
+
+	/*
+	 * done!
+	 */
+	return(0);
+
+	/*
+	 * bad: failure recovery
+	 */
+bad:
+	vm_map_unlock(srcmap);
+bad2:			/* src already unlocked */
+	if (chain)
+		uvm_unmap_detach(chain,
+		    (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
+	uvm_unmap(dstmap, dstaddr, dstaddr+len);   /* ??? */
+	return(error);
+}
+
+/* end of extraction functions */
+
+/*
+ * uvm_map_submap: punch down part of a map into a submap
+ *
+ * => only the kernel_map is allowed to be submapped
+ * => the purpose of submapping is to break up the locking granularity
+ *	of a larger map
+ * => the range specified must have been mapped previously with a uvm_map()
+ *	call [with uobj==NULL] to create a blank map entry in the main map.
+ *	[And it had better still be blank!]
+ * => maps which contain submaps should never be copied or forked.
+ * => to remove a submap, use uvm_unmap() on the main map 
+ *	and then uvm_map_deallocate() the submap.
+ * => main map must be unlocked.
+ * => submap must have been init'd and have a zero reference count.
+ *	[need not be locked as we don't actually reference it]
+ */
+	 
+int
+uvm_map_submap(map, start, end, submap)
+	vm_map_t map, submap;
+	vaddr_t start, end;
+{
+	vm_map_entry_t entry;
+	int result;
+	UVMHIST_FUNC("uvm_map_submap"); UVMHIST_CALLED(maphist);
+
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+ 
+	if (uvm_map_lookup_entry(map, start, &entry)) {
+		UVM_MAP_CLIP_START(map, entry, start);
+		UVM_MAP_CLIP_END(map, entry, end);		/* to be safe */
+	}             
+	else {
+		entry = NULL;
+	}
+
+	if (entry != NULL && 
+	    entry->start == start && entry->end == end &&
+	    entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
+	    !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
+		
+		/*
+		 * doit!
+		 */
+		entry->etype |= UVM_ET_SUBMAP;
+		entry->object.sub_map = submap;
+		entry->offset = 0;
+		uvm_map_reference(submap);
+		result = KERN_SUCCESS;
+	} else {
+		result = KERN_INVALID_ARGUMENT;
+	}
+	vm_map_unlock(map);
+
+	return(result);
+}
+
+
+/*
+ * uvm_map_protect: change map protection
+ *
+ * => set_max means set max_protection.
+ * => map must be unlocked.
+ * => XXXCDC: does not work properly with share maps.  rethink.
+ */
+
+#define MASK(entry)     ( UVM_ET_ISCOPYONWRITE(entry) ? \
+	~VM_PROT_WRITE : VM_PROT_ALL)
+#define max(a,b)        ((a) > (b) ? (a) : (b))
+
+int
+uvm_map_protect(map, start, end, new_prot, set_max)
+	vm_map_t map;
+	vaddr_t start, end;
+	vm_prot_t new_prot;
+	boolean_t set_max;
+{
+	vm_map_entry_t current, entry;
+	UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
+	map, start, end, new_prot);
+	
+	vm_map_lock(map);
+
+	VM_MAP_RANGE_CHECK(map, start, end);
+	
+	if (uvm_map_lookup_entry(map, start, &entry)) {
+		UVM_MAP_CLIP_START(map, entry, start);
+	} else {
+		entry = entry->next;
+	}
+
+	/*
+	 * make a first pass to check for protection violations.
+	 */
+
+	current = entry;
+	while ((current != &map->header) && (current->start < end)) {
+		if (UVM_ET_ISSUBMAP(current))
+			return(KERN_INVALID_ARGUMENT);
+		if ((new_prot & current->max_protection) != new_prot) {
+			vm_map_unlock(map);
+			return(KERN_PROTECTION_FAILURE);
+		}
+			current = current->next;
+	}
+
+	/* go back and fix up protections (no need to clip this time). */
+
+	current = entry;
+
+	while ((current != &map->header) && (current->start < end)) {
+		vm_prot_t old_prot;
+		
+		UVM_MAP_CLIP_END(map, current, end);
+
+		old_prot = current->protection;
+		if (set_max)
+			current->protection =
+			    (current->max_protection = new_prot) & old_prot;
+		else
+			current->protection = new_prot;
+
+		/*
+		 * update physical map if necessary.  worry about copy-on-write 
+		 * here -- CHECK THIS XXX
+		 */
+
+		if (current->protection != old_prot) {
+
+			/* update pmap! */
+			pmap_protect(map->pmap, current->start, current->end,
+			    current->protection & MASK(entry));
+
+		}
+		current = current->next;
+	}
+	
+	vm_map_unlock(map);
+	UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+	return(KERN_SUCCESS);
+}
+
+#undef  max
+#undef  MASK
+
+/* 
+ * uvm_map_inherit: set inheritance code for range of addrs in map.
+ *
+ * => map must be unlocked
+ * => note that the inherit code is used during a "fork".  see fork
+ *	code for details.
+ * => XXXCDC: currently only works in main map.  what about share map?
+ */
+
+int
+uvm_map_inherit(map, start, end, new_inheritance)
+	vm_map_t map;
+	vaddr_t start;
+	vaddr_t end;
+	vm_inherit_t new_inheritance;
+{
+	vm_map_entry_t entry, temp_entry;
+	UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
+	    map, start, end, new_inheritance);
+
+	switch (new_inheritance) {
+	case VM_INHERIT_NONE:
+	case VM_INHERIT_COPY:
+	case VM_INHERIT_SHARE:
+		break;
+	default:
+		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+		return(KERN_INVALID_ARGUMENT);
+	}
+
+	vm_map_lock(map);
+	
+	VM_MAP_RANGE_CHECK(map, start, end);
+	
+	if (uvm_map_lookup_entry(map, start, &temp_entry)) {
+		entry = temp_entry;
+		UVM_MAP_CLIP_START(map, entry, start);
+	}  else {
+		entry = temp_entry->next;
+	}
+	
+	while ((entry != &map->header) && (entry->start < end)) {
+		UVM_MAP_CLIP_END(map, entry, end);
+
+		entry->inheritance = new_inheritance;
+		
+		entry = entry->next;
+	}
+
+	vm_map_unlock(map);
+	UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
+	return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_pageable: sets the pageability of a range in a map.
+ *
+ * => regions sepcified as not pageable require lock-down (wired) memory
+ *	and page tables.
+ * => map must not be locked.
+ * => XXXCDC: check this and try and clean it up.
+ */
+
+int
+uvm_map_pageable(map, start, end, new_pageable)
+	vm_map_t map;
+	vaddr_t start, end;
+	boolean_t new_pageable;
+{
+	vm_map_entry_t entry, start_entry;
+	vaddr_t failed = 0;
+	int rv;
+	UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
+	map, start, end, new_pageable);
+
+	vm_map_lock(map);
+	VM_MAP_RANGE_CHECK(map, start, end);
+
+	/* 
+	 * only one pageability change may take place at one time, since
+	 * uvm_fault_wire assumes it will be called only once for each
+	 * wiring/unwiring.  therefore, we have to make sure we're actually
+	 * changing the pageability for the entire region.  we do so before
+	 * making any changes.  
+	 */
+
+	if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
+		vm_map_unlock(map);
+	 
+		UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+		return (KERN_INVALID_ADDRESS);
+	}
+	entry = start_entry;
+
+	/* 
+	 * handle wiring and unwiring seperately.
+	 */
+
+	if (new_pageable) {               /* unwire */
+
+		UVM_MAP_CLIP_START(map, entry, start);
+
+		/*
+		 * unwiring.  first ensure that the range to be unwired is
+		 * really wired down and that there are no holes.  
+		 */
+		while ((entry != &map->header) && (entry->start < end)) {
+			
+			if (entry->wired_count == 0 ||
+			    (entry->end < end &&
+			    (entry->next == &map->header ||
+			    entry->next->start > entry->end))) {
+				vm_map_unlock(map);
+				UVMHIST_LOG(maphist,
+				    "<- done (INVALID UNWIRE ARG)",0,0,0,0);
+				return (KERN_INVALID_ARGUMENT);
+			}
+			entry = entry->next;
+		}
+
+		/* 
+		 * now decrement the wiring count for each region.  if a region
+		 * becomes completely unwired, unwire its physical pages and
+		 * mappings.
+		 */
+#if 0		/* not necessary: uvm_fault_unwire does not lock */
+		lock_set_recursive(&map->lock);
+#endif  /* XXXCDC */
+
+		entry = start_entry;
+		while ((entry != &map->header) && (entry->start < end)) {
+			UVM_MAP_CLIP_END(map, entry, end);
+			
+			entry->wired_count--;
+			if (entry->wired_count == 0)
+				uvm_map_entry_unwire(map, entry);
+			
+			entry = entry->next;
+		}
+#if 0 /* XXXCDC: not necessary, see above */
+		lock_clear_recursive(&map->lock);
+#endif
+		vm_map_unlock(map);
+		UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
+		return(KERN_SUCCESS);
+
+		/*
+		 * end of unwire case!
+		 */
+	}
+
+	/*
+	 * wire case: in two passes [XXXCDC: ugly block of code here]
+	 *
+	 * 1: holding the write lock, we create any anonymous maps that need
+	 *    to be created.  then we clip each map entry to the region to
+	 *    be wired and increment its wiring count.  
+	 *
+	 * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
+	 *    in the pages for any newly wired area (wired_count is 1).
+	 *
+	 *    downgrading to a read lock for uvm_fault_wire avoids a possible
+	 *    deadlock with another thread that may have faulted on one of
+	 *    the pages to be wired (it would mark the page busy, blocking
+	 *    us, then in turn block on the map lock that we hold).  because
+	 *    of problems in the recursive lock package, we cannot upgrade
+	 *    to a write lock in vm_map_lookup.  thus, any actions that
+	 *    require the write lock must be done beforehand.  because we
+	 *    keep the read lock on the map, the copy-on-write status of the
+	 *    entries we modify here cannot change.
+	 */
+
+	while ((entry != &map->header) && (entry->start < end)) {
+
+		if (entry->wired_count == 0) {  /* not already wired? */
+			
+			/* 
+			 * perform actions of vm_map_lookup that need the
+			 * write lock on the map: create an anonymous map
+			 * for a copy-on-write region, or an anonymous map
+			 * for a zero-fill region.  (XXXCDC: submap case
+			 * ok?)
+			 */
+			
+			if (!UVM_ET_ISSUBMAP(entry)) {  /* not submap */
+				/*
+				 * XXXCDC: protection vs. max_protection??
+				 * (wirefault uses max?)
+				 * XXXCDC: used to do it always if
+				 * uvm_obj == NULL (wrong?)
+				 */
+				if ( UVM_ET_ISNEEDSCOPY(entry) && 
+				    (entry->protection & VM_PROT_WRITE) != 0) {
+					amap_copy(map, entry, M_WAITOK, TRUE,
+					    start, end); 
+					/* XXXCDC: wait OK? */
+				}
+			}
+		}     /* wired_count == 0 */
+		UVM_MAP_CLIP_START(map, entry, start);
+		UVM_MAP_CLIP_END(map, entry, end);
+		entry->wired_count++;
+
+		/*
+		 * Check for holes 
+		 */
+		if (entry->end < end && (entry->next == &map->header ||
+			     entry->next->start > entry->end)) {
+			/*
+			 * found one.  amap creation actions do not need to
+			 * be undone, but the wired counts need to be restored. 
+			 */
+			while (entry != &map->header && entry->end > start) {
+				entry->wired_count--;
+				entry = entry->prev;
+			}
+			vm_map_unlock(map);
+			UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
+			return(KERN_INVALID_ARGUMENT);
+		}
+		entry = entry->next;
+	}
+
+	/*
+	 * Pass 2.
+	 */
+	/*
+	 * HACK HACK HACK HACK
+	 *
+	 * if we are wiring in the kernel map or a submap of it, unlock the
+	 * map to avoid deadlocks.  we trust that the kernel threads are
+	 * well-behaved, and therefore will not do anything destructive to
+	 * this region of the map while we have it unlocked.  we cannot
+	 * trust user threads to do the same.
+	 *
+	 * HACK HACK HACK HACK 
+	 */
+	if (vm_map_pmap(map) == pmap_kernel()) {
+		vm_map_unlock(map);         /* trust me ... */
+	} else {
+		vm_map_set_recursive(&map->lock);
+		lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc /*XXX*/);
+	}
+
+	rv = 0;
+	entry = start_entry;
+	while (entry != &map->header && entry->start < end) {
+		/*
+		 * if uvm_fault_wire fails for any page we need to undo what has
+		 * been done.  we decrement the wiring count for those pages
+		 * which have not yet been wired (now) and unwire those that
+		 * have * (later).
+		 *
+		 * XXX this violates the locking protocol on the map, needs to
+		 * be fixed.  [because we only have a read lock on map we 
+		 * shouldn't be changing wired_count?]
+		 */
+		if (rv) {
+			entry->wired_count--;
+		} else if (entry->wired_count == 1) {
+			rv = uvm_fault_wire(map, entry->start, entry->end);
+			if (rv) {
+				failed = entry->start;
+				entry->wired_count--;
+			}
+		}
+		entry = entry->next;
+	}
+
+	if (vm_map_pmap(map) == pmap_kernel()) {
+		vm_map_lock(map);     /* relock */
+	} else {
+		vm_map_clear_recursive(&map->lock);
+	} 
+
+	if (rv) {        /* failed? */
+		vm_map_unlock(map);
+		(void) uvm_map_pageable(map, start, failed, TRUE);
+		UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
+		return(rv);
+	}
+	vm_map_unlock(map);
+	
+	UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
+	return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_clean: push dirty pages off to backing store.
+ *
+ * => valid flags:
+ *   if (flags & PGO_SYNCIO): dirty pages are written synchronously
+ *   if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
+ *   if (flags & PGO_FREE): any cached pages are freed after clean
+ * => returns an error if any part of the specified range isn't mapped
+ * => never a need to flush amap layer since the anonymous memory has 
+ *	no permanent home...
+ * => called from sys_msync()
+ * => caller must not write-lock map (read OK).
+ * => we may sleep while cleaning if SYNCIO [with map read-locked]
+ * => XXX: does this handle share maps properly?
+ */
+
+int
+uvm_map_clean(map, start, end, flags)
+	vm_map_t map;
+	vaddr_t start, end;
+	int flags;
+{
+	vm_map_entry_t current;
+	vm_map_entry_t entry;
+	vsize_t size;
+	struct uvm_object *object;
+	vaddr_t offset;
+	UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)",
+	map, start, end, flags);
+
+	vm_map_lock_read(map);
+	VM_MAP_RANGE_CHECK(map, start, end);
+	if (!uvm_map_lookup_entry(map, start, &entry)) {
+		vm_map_unlock_read(map);
+		return(KERN_INVALID_ADDRESS);
+	}
+
+	/*
+	 * Make a first pass to check for holes.
+	 */
+	for (current = entry; current->start < end; current = current->next) {
+		if (UVM_ET_ISSUBMAP(current)) {
+			vm_map_unlock_read(map);
+			return(KERN_INVALID_ARGUMENT);
+		}
+		if (end > current->end && (current->next == &map->header ||
+		    current->end != current->next->start)) {
+			vm_map_unlock_read(map);
+			return(KERN_INVALID_ADDRESS);
+		}
+	}
+
+	/* 
+	 * add "cleanit" flag to flags (for generic flush routine).  
+	 * then make a second pass, cleaning/uncaching pages from 
+	 * the indicated objects as we go.  
+	 */
+	flags = flags | PGO_CLEANIT;
+	for (current = entry; current->start < end; current = current->next) {
+		offset = current->offset + (start - current->start);
+		size = (end <= current->end ? end : current->end) - start;
+
+		/*
+		 * get object/offset.  can't be submap (checked above).
+		 */
+		object = current->object.uvm_obj;
+		simple_lock(&object->vmobjlock);
+
+		/*
+		 * flush pages if we've got a valid backing object.
+		 * note that object is locked.
+		 * XXX should we continue on an error?
+		 */
+
+		if (object && object->pgops) {
+			if (!object->pgops->pgo_flush(object, offset,
+			    offset+size, flags)) {
+				simple_unlock(&object->vmobjlock);
+				vm_map_unlock_read(map);
+				return (KERN_FAILURE);
+			}
+		}
+		simple_unlock(&object->vmobjlock);
+		start += size;
+	}
+	vm_map_unlock_read(map);
+	return(KERN_SUCCESS); 
+}
+
+
+/*
+ * uvm_map_checkprot: check protection in map
+ *
+ * => must allow specified protection in a fully allocated region.
+ * => map must be read or write locked by caller.
+ */
+
+boolean_t
+uvm_map_checkprot(map, start, end, protection)
+	vm_map_t       map;
+	vaddr_t    start, end;
+	vm_prot_t      protection;
+{
+	 vm_map_entry_t entry;
+	 vm_map_entry_t tmp_entry;
+
+	 if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
+		 return(FALSE);
+	 }
+
+	 entry = tmp_entry;
+	 
+	 while (start < end) {
+		 if (entry == &map->header) {
+			 return(FALSE);
+		 }
+		 
+		/*
+		 * no holes allowed
+		 */
+
+		 if (start < entry->start) {
+			 return(FALSE);
+		 }
+
+		/*
+		 * check protection associated with entry
+		 */
+
+		 if ((entry->protection & protection) != protection) {
+			 return(FALSE);
+		 }
+
+		 /* go to next entry */
+		 
+		 start = entry->end;
+		 entry = entry->next;
+	 }
+	 return(TRUE);
+}
+
+/*
+ * uvmspace_alloc: allocate a vmspace structure.
+ *
+ * - structure includes vm_map and pmap
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must be init'd by caller
+ */
+struct vmspace *
+uvmspace_alloc(min, max, pageable)
+	vaddr_t min, max;
+	int pageable;
+{
+	struct vmspace *vm;
+	UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
+
+	vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
+	uvmspace_init(vm, NULL, min, max, pageable);
+	UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
+	return (vm);
+}
+
+/*
+ * uvmspace_init: initialize a vmspace structure.
+ *
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must me init'd by caller
+ */
+void
+uvmspace_init(vm, pmap, min, max, pageable)
+	struct vmspace *vm;
+	struct pmap *pmap;
+	vaddr_t min, max;
+	boolean_t pageable;
+{
+	UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
+
+	bzero(vm, sizeof(*vm));
+
+	uvm_map_setup(&vm->vm_map, min, max, pageable);
+
+	if (pmap)
+		pmap_reference(pmap);
+	else
+#if defined(PMAP_NEW)
+		pmap = pmap_create();
+#else
+		pmap = pmap_create(0);
+#endif
+	vm->vm_map.pmap = pmap;
+
+	vm->vm_refcnt = 1;
+	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+}
+
+/*
+ * uvmspace_share: share a vmspace between two proceses
+ *
+ * - XXX: no locking on vmspace
+ * - used for vfork, threads(?)
+ */
+
+void
+uvmspace_share(p1, p2)
+	struct proc *p1, *p2;
+{
+	p2->p_vmspace = p1->p_vmspace;
+	p1->p_vmspace->vm_refcnt++;
+}
+
+/*
+ * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_unshare(p)
+	struct proc *p; 
+{
+	struct vmspace *nvm, *ovm = p->p_vmspace;
+	int s;
+ 
+	if (ovm->vm_refcnt == 1)
+		/* nothing to do: vmspace isn't shared in the first place */
+		return;
+ 
+	/* make a new vmspace, still holding old one */
+	nvm = uvmspace_fork(ovm);
+
+	s = splhigh();			/* make this `atomic' */
+	pmap_deactivate(p);
+					/* unbind old vmspace */
+	p->p_vmspace = nvm; 
+	pmap_activate(p);
+					/* switch to new vmspace */
+	splx(s);			/* end of critical section */
+
+	uvmspace_free(ovm);		/* drop reference to old vmspace */
+}
+
+/*
+ * uvmspace_exec: the process wants to exec a new program
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_exec(p)
+	struct proc *p;
+{
+	struct vmspace *nvm, *ovm = p->p_vmspace;
+	vm_map_t map = &ovm->vm_map;
+	int s;
+
+#ifdef sparc
+	/* XXX cgd 960926: the sparc #ifdef should be a MD hook */
+	kill_user_windows(p);   /* before stack addresses go away */
+#endif
+
+	/*
+	 * see if more than one process is using this vmspace...
+	 */
+
+	if (ovm->vm_refcnt == 1) {
+
+		/*
+		 * if p is the only process using its vmspace then we can safely
+		 * recycle that vmspace for the program that is being exec'd.
+		 */
+
+#ifdef SYSVSHM
+		/*
+		 * SYSV SHM semantics require us to kill all segments on an exec
+		 */
+		if (ovm->vm_shm)
+			shmexit(ovm);
+#endif
+
+		/*
+		 * now unmap the old program
+		 */
+		uvm_unmap(map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
+
+	} else {
+
+		/*
+		 * p's vmspace is being shared, so we can't reuse it for p since
+		 * it is still being used for others.   allocate a new vmspace
+		 * for p
+		 */
+		nvm = uvmspace_alloc(map->min_offset, map->max_offset, 
+			 map->entries_pageable);
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+		/* 
+		 * allocate zero fill area in the new vmspace's map for user
+		 * page tables for ports that have old style pmaps that keep
+		 * user page tables in the top part of the process' address
+		 * space.
+		 *
+		 * XXXCDC: this should go away once all pmaps are fixed
+		 */
+		{ 
+			vaddr_t addr = VM_MAXUSER_ADDRESS;
+			if (uvm_map(&nvm->vm_map, &addr, VM_MAX_ADDRESS - addr,
+			    NULL, UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+			    UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_NORMAL,
+			    UVM_FLAG_FIXED|UVM_FLAG_COPYONW)) != KERN_SUCCESS)
+				panic("vm_allocate of PT page area failed");
+		}
+#endif
+
+		/*
+		 * install new vmspace and drop our ref to the old one.
+		 */
+
+		s = splhigh();
+		pmap_deactivate(p);
+		p->p_vmspace = nvm;
+		pmap_activate(p);
+		splx(s);
+
+		uvmspace_free(ovm);
+	}
+}
+
+/*
+ * uvmspace_free: free a vmspace data structure
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_free(vm)
+	struct vmspace *vm;
+{
+	vm_map_entry_t dead_entries;
+	UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
+	if (--vm->vm_refcnt == 0) {
+		/*
+		 * lock the map, to wait out all other references to it.  delete
+		 * all of the mappings and pages they hold, then call the pmap
+		 * module to reclaim anything left.
+		 */
+		vm_map_lock(&vm->vm_map);
+		if (vm->vm_map.nentries) {
+			(void)uvm_unmap_remove(&vm->vm_map,
+			    vm->vm_map.min_offset, vm->vm_map.max_offset,
+			    &dead_entries);
+			if (dead_entries != NULL)
+				uvm_unmap_detach(dead_entries, 0);
+		}
+		pmap_destroy(vm->vm_map.pmap);
+		vm->vm_map.pmap = NULL;
+		pool_put(&uvm_vmspace_pool, vm);
+	}
+	UVMHIST_LOG(maphist,"<- done", 0,0,0,0);
+}
+
+/*
+ *   F O R K   -   m a i n   e n t r y   p o i n t
+ */
+/*
+ * uvmspace_fork: fork a process' main map
+ *
+ * => create a new vmspace for child process from parent.
+ * => parent's map must not be locked.
+ */
+
+struct vmspace *
+uvmspace_fork(vm1)
+	struct vmspace *vm1;
+{
+	struct vmspace *vm2;
+	vm_map_t        old_map = &vm1->vm_map;
+	vm_map_t        new_map;
+	vm_map_entry_t  old_entry;
+	vm_map_entry_t  new_entry;
+	pmap_t          new_pmap;
+	boolean_t	protect_child;
+	UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+	/*    
+	 * avoid copying any of the parent's pagetables or other per-process
+	 * objects that reside in the map by marking all of them non-inheritable
+	 * XXXCDC: should go away
+	 */
+	(void) uvm_map_inherit(old_map, VM_MAXUSER_ADDRESS, VM_MAX_ADDRESS, 
+			 VM_INHERIT_NONE);
+#endif
+
+	vm_map_lock(old_map);
+
+	vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
+		      old_map->entries_pageable);
+	bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
+	(caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+	new_map = &vm2->vm_map;		  /* XXX */
+	new_pmap = new_map->pmap;
+
+	old_entry = old_map->header.next;
+
+	/*
+	 * go entry-by-entry
+	 */
+
+	while (old_entry != &old_map->header) {
+
+		/*
+		 * first, some sanity checks on the old entry
+		 */
+		if (UVM_ET_ISSUBMAP(old_entry))
+		    panic("fork: encountered a submap during fork (illegal)");
+
+		if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
+			    UVM_ET_ISNEEDSCOPY(old_entry))
+	panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
+
+
+		switch (old_entry->inheritance) {
+		case VM_INHERIT_NONE:
+			/*
+			 * drop the mapping
+			 */
+			break;
+
+		case VM_INHERIT_SHARE:
+			/*
+			 * share the mapping: this means we want the old and
+			 * new entries to share amaps and backing objects.
+			 */
+
+			/*
+			 * if the old_entry needs a new amap (due to prev fork)
+			 * then we need to allocate it now so that we have
+			 * something we own to share with the new_entry.   [in
+			 * other words, we need to clear needs_copy]
+			 */
+
+			if (UVM_ET_ISNEEDSCOPY(old_entry)) {
+				/* get our own amap, clears needs_copy */
+				amap_copy(old_map, old_entry, M_WAITOK, FALSE,
+				    0, 0); 
+				/* XXXCDC: WAITOK??? */
+			}
+
+			new_entry = uvm_mapent_alloc(new_map);
+			/* old_entry -> new_entry */
+			uvm_mapent_copy(old_entry, new_entry);
+
+			/* new pmap has nothing wired in it */
+			new_entry->wired_count = 0;
+
+			/*
+			 * gain reference to object backing the map (can't
+			 * be a submap, already checked this case).
+			 */
+			if (new_entry->aref.ar_amap)
+				/* share reference */
+				amap_ref(new_entry, AMAP_SHARED);
+
+			if (new_entry->object.uvm_obj &&
+			    new_entry->object.uvm_obj->pgops->pgo_reference)
+				new_entry->object.uvm_obj->
+				    pgops->pgo_reference(
+				        new_entry->object.uvm_obj);
+
+			/* insert entry at end of new_map's entry list */
+			uvm_map_entry_link(new_map, new_map->header.prev,
+			    new_entry);
+
+			/* 
+			 * pmap_copy the mappings: this routine is optional
+			 * but if it is there it will reduce the number of
+			 * page faults in the new proc.
+			 */
+
+			pmap_copy(new_pmap, old_map->pmap, new_entry->start,
+			    (old_entry->end - old_entry->start),
+			    old_entry->start);
+
+			break;
+
+		case VM_INHERIT_COPY:
+
+			/*
+			 * copy-on-write the mapping (using mmap's
+			 * MAP_PRIVATE semantics)
+			 *
+			 * allocate new_entry, adjust reference counts.  
+			 * (note that new references are read-only).
+			 */
+
+			new_entry = uvm_mapent_alloc(new_map);
+			/* old_entry -> new_entry */
+			uvm_mapent_copy(old_entry, new_entry);
+
+			if (new_entry->aref.ar_amap)
+				amap_ref(new_entry, 0);
+
+			if (new_entry->object.uvm_obj &&
+			    new_entry->object.uvm_obj->pgops->pgo_reference)
+				new_entry->object.uvm_obj->pgops->pgo_reference
+				    (new_entry->object.uvm_obj);
+
+			/* new pmap has nothing wired in it */
+			new_entry->wired_count = 0;
+
+			new_entry->etype |=
+			    (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
+			uvm_map_entry_link(new_map, new_map->header.prev,
+			    new_entry);
+			
+			/*
+			 * the new entry will need an amap.  it will either
+			 * need to be copied from the old entry or created
+			 * from scratch (if the old entry does not have an
+			 * amap).  can we defer this process until later
+			 * (by setting "needs_copy") or do we need to copy
+			 * the amap now?
+			 *
+			 * we must copy the amap now if any of the following
+			 * conditions hold:
+			 * 1. the old entry has an amap and that amap is
+			 *    being shared.  this means that the old (parent)
+			 *    process is sharing the amap with another 
+			 *    process.  if we do not clear needs_copy here
+			 *    we will end up in a situation where both the
+			 *    parent and child process are refering to the
+			 *    same amap with "needs_copy" set.  if the 
+			 *    parent write-faults, the fault routine will
+			 *    clear "needs_copy" in the parent by allocating
+			 *    a new amap.   this is wrong because the 
+			 *    parent is supposed to be sharing the old amap
+			 *    and the new amap will break that.
+			 *
+			 * 2. if the old entry has an amap and a non-zero
+			 *    wire count then we are going to have to call
+			 *    amap_cow_now to avoid page faults in the 
+			 *    parent process.   since amap_cow_now requires
+			 *    "needs_copy" to be clear we might as well
+			 *    clear it here as well.
+			 *
+			 */
+
+			if (old_entry->aref.ar_amap != NULL) {
+
+			  if ((amap_flags(old_entry->aref.ar_amap) & 
+			       AMAP_SHARED) != 0 ||
+			      old_entry->wired_count != 0) {
+
+			    amap_copy(new_map, new_entry, M_WAITOK, FALSE,
+				      0, 0);
+			    /* XXXCDC: M_WAITOK ... ok? */
+			  }
+			}
+			
+			/*
+			 * if the parent's entry is wired down, then the
+			 * parent process does not want page faults on
+			 * access to that memory.  this means that we
+			 * cannot do copy-on-write because we can't write
+			 * protect the old entry.   in this case we
+			 * resolve all copy-on-write faults now, using
+			 * amap_cow_now.   note that we have already
+			 * allocated any needed amap (above).
+			 */
+
+			if (old_entry->wired_count != 0) {
+
+			  /* 
+			   * resolve all copy-on-write faults now
+			   * (note that there is nothing to do if 
+			   * the old mapping does not have an amap).
+			   * XXX: is it worthwhile to bother with pmap_copy
+			   * in this case?
+			   */
+			  if (old_entry->aref.ar_amap)
+			    amap_cow_now(new_map, new_entry);
+
+			} else { 
+
+			  /*
+			   * setup mappings to trigger copy-on-write faults
+			   * we must write-protect the parent if it has
+			   * an amap and it is not already "needs_copy"...
+			   * if it is already "needs_copy" then the parent
+			   * has already been write-protected by a previous
+			   * fork operation.
+			   *
+			   * if we do not write-protect the parent, then
+			   * we must be sure to write-protect the child
+			   * after the pmap_copy() operation.
+			   *
+			   * XXX: pmap_copy should have some way of telling
+			   * us that it didn't do anything so we can avoid
+			   * calling pmap_protect needlessly.
+			   */
+
+			  if (old_entry->aref.ar_amap) {
+
+			    if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
+			      if (old_entry->max_protection & VM_PROT_WRITE) {
+				pmap_protect(old_map->pmap,
+					     old_entry->start,
+					     old_entry->end,
+					     old_entry->protection &
+					     ~VM_PROT_WRITE);
+			      }
+			      old_entry->etype |= UVM_ET_NEEDSCOPY;
+			    }
+
+			    /*
+			     * parent must now be write-protected
+			     */
+			    protect_child = FALSE;
+			  } else {
+
+			    /*
+			     * we only need to protect the child if the 
+			     * parent has write access.
+			     */
+			    if (old_entry->max_protection & VM_PROT_WRITE)
+			      protect_child = TRUE;
+			    else
+			      protect_child = FALSE;
+
+			  }
+
+			  /*
+			   * copy the mappings
+			   * XXX: need a way to tell if this does anything
+			   */
+
+			  pmap_copy(new_pmap, old_map->pmap,
+				    new_entry->start,
+				    (old_entry->end - old_entry->start),
+				    old_entry->start);
+			  
+			  /*
+			   * protect the child's mappings if necessary
+			   */
+			  if (protect_child) {
+			    pmap_protect(new_pmap, new_entry->start,
+					 new_entry->end, 
+					 new_entry->protection & 
+					          ~VM_PROT_WRITE);
+			  }
+
+			}
+			break;
+		}  /* end of switch statement */
+		old_entry = old_entry->next;
+	}
+
+	new_map->size = old_map->size;
+	vm_map_unlock(old_map); 
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+	/* 
+	 * allocate zero fill area in the new vmspace's map for user
+	 * page tables for ports that have old style pmaps that keep
+	 * user page tables in the top part of the process' address
+	 * space.
+	 *
+	 * XXXCDC: this should go away once all pmaps are fixed
+	 */
+	{
+		vaddr_t addr = VM_MAXUSER_ADDRESS;
+		if (uvm_map(new_map, &addr, VM_MAX_ADDRESS - addr, NULL,
+		    UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+		    UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_NORMAL,
+		    UVM_FLAG_FIXED|UVM_FLAG_COPYONW)) != KERN_SUCCESS)
+			panic("vm_allocate of PT page area failed");
+	}
+#endif
+
+#ifdef SYSVSHM
+	if (vm1->vm_shm)
+		shmfork(vm1, vm2);
+#endif
+
+	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+	return(vm2);    
+}
+
+
+#if defined(DDB)
+
+/*
+ * DDB hooks
+ */
+
+/*
+ * uvm_map_print: print out a map 
+ */
+
+void
+uvm_map_print(map, full)
+	vm_map_t map;
+	boolean_t full;
+{
+
+	uvm_map_printit(map, full, printf);
+}
+
+/*
+ * uvm_map_printit: actually prints the map
+ */
+
+void
+uvm_map_printit(map, full, pr)
+	vm_map_t map;
+	boolean_t full;
+	int (*pr) __P((const char *, ...));
+{
+	vm_map_entry_t entry;
+
+	(*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
+	(*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d\n",
+	    map->nentries, map->size, map->ref_count, map->timestamp);
+#ifdef pmap_resident_count
+	(*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 
+	    pmap_resident_count(map->pmap));
+#else
+	/* XXXCDC: this should be required ... */
+	(*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
+#endif
+	if (!full)
+		return;
+	for (entry = map->header.next; entry != &map->header;
+	    entry = entry->next) {
+		(*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%x, amap=%p/%d\n",
+		    entry, entry->start, entry->end, entry->object.uvm_obj,
+		    entry->offset, entry->aref.ar_amap, entry->aref.ar_pageoff);
+		(*pr)(
+"\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, wc=%d, adv=%d\n",
+		    (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
+		    (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F', 
+		    (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
+		    entry->protection, entry->max_protection,
+		    entry->inheritance, entry->wired_count, entry->advice);
+	}
+} 
+
+/*
+ * uvm_object_print: print out an object 
+ */
+
+void
+uvm_object_print(uobj, full)
+	struct uvm_object *uobj;
+	boolean_t full;
+{
+
+	uvm_object_printit(uobj, full, printf);
+}
+
+/*
+ * uvm_object_printit: actually prints the object
+ */
+
+void
+uvm_object_printit(uobj, full, pr)
+	struct uvm_object *uobj;
+	boolean_t full;
+	int (*pr) __P((const char *, ...));
+{
+	struct vm_page *pg;
+	int cnt = 0;
+
+	(*pr)("OBJECT %p: pgops=%p, npages=%d, ", uobj, uobj->pgops,
+	    uobj->uo_npages);
+	if (uobj->uo_refs == UVM_OBJ_KERN)
+		(*pr)("refs=<SYSTEM>\n");
+	else
+		(*pr)("refs=%d\n", uobj->uo_refs);
+
+	if (!full) return;
+	(*pr)("  PAGES <pg,offset>:\n  ");
+	for (pg = uobj->memq.tqh_first ; pg ; pg = pg->listq.tqe_next, cnt++) {
+		(*pr)("<%p,0x%lx> ", pg, pg->offset);
+		if ((cnt % 3) == 2) (*pr)("\n  ");
+	}
+	if ((cnt % 3) != 2) (*pr)("\n");
+} 
+
+/*
+ * uvm_page_print: print out a page
+ */
+
+void
+uvm_page_print(pg, full)
+	struct vm_page *pg;
+	boolean_t full;
+{
+
+	uvm_page_printit(pg, full, printf);
+}
+
+/*
+ * uvm_page_printit: actually print the page
+ */
+
+void
+uvm_page_printit(pg, full, pr)
+	struct vm_page *pg;
+	boolean_t full;
+	int (*pr) __P((const char *, ...));
+{
+	struct vm_page *lcv;
+	struct uvm_object *uobj;
+	struct pglist *pgl;
+
+	(*pr)("PAGE %p:\n", pg);
+	(*pr)("  flags=0x%x, pqflags=0x%x, vers=%d, wire_count=%d, pa=0x%lx\n", 
+	pg->flags, pg->pqflags, pg->version, pg->wire_count, (long)pg->phys_addr);
+	(*pr)("  uobject=%p, uanon=%p, offset=0x%lx loan_count=%d\n", 
+	pg->uobject, pg->uanon, pg->offset, pg->loan_count);
+#if defined(UVM_PAGE_TRKOWN)
+	if (pg->flags & PG_BUSY)
+		(*pr)("  owning process = %d, tag=%s\n",
+		    pg->owner, pg->owner_tag);
+	else
+		(*pr)("  page not busy, no owner\n");
+#else
+	(*pr)("  [page ownership tracking disabled]\n");
+#endif
+
+	if (!full)
+		return;
+
+	/* cross-verify object/anon */
+	if ((pg->pqflags & PQ_FREE) == 0) {
+		if (pg->pqflags & PQ_ANON) {
+			if (pg->uanon == NULL || pg->uanon->u.an_page != pg)
+			    (*pr)("  >>> ANON DOES NOT POINT HERE <<< (%p)\n", 
+				(pg->uanon) ? pg->uanon->u.an_page : NULL);
+			else
+				(*pr)("  anon backpointer is OK\n");
+		} else {
+			uobj = pg->uobject;
+			if (uobj) {
+				(*pr)("  checking object list\n");
+				for (lcv = uobj->memq.tqh_first ; lcv ;
+				    lcv = lcv->listq.tqe_next) {
+					if (lcv == pg) break;
+				}
+				if (lcv)
+					(*pr)("  page found on object list\n");
+				else
+			(*pr)("  >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
+			}
+		}
+	}
+
+	/* cross-verify page queue */
+	if (pg->pqflags & PQ_FREE)
+		pgl = &uvm.page_free[uvm_page_lookup_freelist(pg)];
+	else if (pg->pqflags & PQ_INACTIVE)
+		pgl = (pg->pqflags & PQ_SWAPBACKED) ? 
+		    &uvm.page_inactive_swp : &uvm.page_inactive_obj;
+	else if (pg->pqflags & PQ_ACTIVE)
+		pgl = &uvm.page_active;
+	else
+		pgl = NULL;
+
+	if (pgl) {
+		(*pr)("  checking pageq list\n");
+		for (lcv = pgl->tqh_first ; lcv ; lcv = lcv->pageq.tqe_next) {
+			if (lcv == pg) break;
+		}
+		if (lcv)
+			(*pr)("  page found on pageq list\n");
+		else
+			(*pr)("  >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
+	}
+}
+#endif
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
new file mode 100644
index 00000000000..4c10b5222d1
--- /dev/null
+++ b/sys/uvm/uvm_map.h
@@ -0,0 +1,166 @@
+/*	$NetBSD: uvm_map.h,v 1.10 1998/10/11 23:14:48 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_map.h    8.3 (Berkeley) 3/15/94
+ * from: Id: uvm_map.h,v 1.1.2.3 1998/02/07 01:16:55 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_MAP_H_
+#define _UVM_UVM_MAP_H_
+
+/*
+ * uvm_map.h
+ */
+
+/*
+ * macros
+ */
+
+/*
+ * UVM_MAP_CLIP_START: ensure that the entry begins at or after
+ * the starting address, if it doesn't we split the entry.
+ * 
+ * => map must be locked by caller
+ */
+
+#define UVM_MAP_CLIP_START(MAP,ENTRY,VA) { \
+	if ((VA) > (ENTRY)->start) uvm_map_clip_start(MAP,ENTRY,VA); }
+
+/*
+ * UVM_MAP_CLIP_END: ensure that the entry ends at or before
+ *      the ending address, if it does't we split the entry.
+ *
+ * => map must be locked by caller
+ */
+
+#define UVM_MAP_CLIP_END(MAP,ENTRY,VA) { \
+	if ((VA) < (ENTRY)->end) uvm_map_clip_end(MAP,ENTRY,VA); }
+
+/*
+ * extract flags
+ */
+#define UVM_EXTRACT_REMOVE	0x1	/* remove mapping from old map */
+#define UVM_EXTRACT_CONTIG	0x2	/* try to keep it contig */
+#define UVM_EXTRACT_QREF	0x4	/* use quick refs */
+#define UVM_EXTRACT_FIXPROT	0x8	/* set prot to maxprot as we go */
+
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_MAP_INLINE
+#define MAP_INLINE static __inline
+#else 
+#define MAP_INLINE /* nothing */
+#endif /* UVM_MAP_INLINE */
+
+/*
+ * protos: the following prototypes define the interface to vm_map
+ */
+
+MAP_INLINE
+void		uvm_map_deallocate __P((vm_map_t));
+
+int		uvm_map_clean __P((vm_map_t, vaddr_t, vaddr_t, int));
+void		uvm_map_clip_start __P((vm_map_t,
+				vm_map_entry_t, vaddr_t));
+void		uvm_map_clip_end __P((vm_map_t, vm_map_entry_t,
+				vaddr_t));
+MAP_INLINE
+vm_map_t	uvm_map_create __P((pmap_t, vaddr_t, 
+			vaddr_t, boolean_t));
+int		uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t, 
+			vm_map_t, vaddr_t *, int));
+vm_map_entry_t	uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
+			vaddr_t *, struct uvm_object *, vaddr_t, 
+			boolean_t));
+int		uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t,
+			vm_inherit_t));
+void		uvm_map_init __P((void));
+boolean_t	uvm_map_lookup_entry __P((vm_map_t, vaddr_t, 
+			vm_map_entry_t *));
+MAP_INLINE
+void		uvm_map_reference __P((vm_map_t));
+int		uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t, 
+			vm_map_entry_t, int));
+int		uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t, 
+			vaddr_t *));
+void		uvm_map_setup __P((vm_map_t, vaddr_t, 
+			vaddr_t, boolean_t));
+int		uvm_map_submap __P((vm_map_t, vaddr_t, 
+			vaddr_t, vm_map_t));
+MAP_INLINE
+int		uvm_unmap __P((vm_map_t, vaddr_t, vaddr_t));
+void		uvm_unmap_detach __P((vm_map_entry_t,int));
+int		uvm_unmap_remove __P((vm_map_t, vaddr_t, vaddr_t,
+				      vm_map_entry_t *));
+
+struct vmspace *uvmspace_fork __P((struct vmspace *));
+
+#endif /* _UVM_UVM_MAP_H_ */
diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h
new file mode 100644
index 00000000000..56842e191b6
--- /dev/null
+++ b/sys/uvm/uvm_map_i.h
@@ -0,0 +1,243 @@
+/*	$NetBSD: uvm_map_i.h,v 1.10 1998/10/11 23:14:48 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_map.c    8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_map_i.h,v 1.1.2.1 1997/08/14 19:10:50 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_MAP_I_H_
+#define _UVM_UVM_MAP_I_H_
+
+/*
+ * uvm_map_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_MAP_INLINE) || defined(UVM_MAP)
+
+/*
+ * uvm_map_create: create map
+ */
+
+MAP_INLINE vm_map_t
+uvm_map_create(pmap, min, max, pageable)
+	pmap_t pmap;
+	vaddr_t min, max;
+	boolean_t pageable;
+{
+	vm_map_t result;
+
+	MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK);
+	uvm_map_setup(result, min, max, pageable);
+	result->pmap = pmap;
+	return(result);
+}
+
+/*
+ * uvm_map_setup: init map
+ *
+ * => map must not be in service yet.
+ */
+
+MAP_INLINE void
+uvm_map_setup(map, min, max, pageable)
+	vm_map_t map;
+	vaddr_t min, max;
+	boolean_t pageable;
+{
+
+	map->header.next = map->header.prev = &map->header;
+	map->nentries = 0;
+	map->size = 0;
+	map->ref_count = 1;
+	map->min_offset = min;
+	map->max_offset = max;
+	map->entries_pageable = pageable;
+	map->first_free = &map->header;
+	map->hint = &map->header;
+	map->timestamp = 0;
+	lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
+	simple_lock_init(&map->ref_lock);
+	simple_lock_init(&map->hint_lock);
+}
+
+
+/*
+ *   U N M A P   -   m a i n   e n t r y   p o i n t
+ */
+
+/*
+ * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")
+ *
+ * => caller must check alignment and size 
+ * => map must be unlocked (we will lock it)
+ * => if the "start"/"stop" range lie within a mapping of a share map,
+ *    then the unmap takes place within the context of that share map
+ *    rather than in the main map, unless the "mainonly" flag is set.
+ *    (e.g. the "exit" system call would want to set "mainonly").
+ */
+
+MAP_INLINE int
+uvm_unmap(map, start, end)
+	vm_map_t map;
+	vaddr_t start,end;
+{
+	int result;
+	vm_map_entry_t dead_entries;
+	UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "  (map=0x%x, start=0x%x, end=0x%x)",
+	map, start, end, 0);
+	/*
+	 * work now done by helper functions.   wipe the pmap's and then
+	 * detach from the dead entries...
+	 */
+	vm_map_lock(map);
+	result = uvm_unmap_remove(map, start, end, &dead_entries);
+	vm_map_unlock(map);
+
+	if (dead_entries != NULL)
+		uvm_unmap_detach(dead_entries, 0);
+
+	UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
+	return(result);
+}
+
+
+/*
+ * uvm_map_reference: add reference to a map
+ *
+ * => map need not be locked (we use ref_lock).
+ */
+
+MAP_INLINE void
+uvm_map_reference(map)
+	vm_map_t map;
+{
+	if (map == NULL) {
+#ifdef DIAGNOSTIC
+		printf("uvm_map_reference: reference to NULL map\n");
+#ifdef DDB
+		Debugger();
+#endif
+#endif
+		return;
+	}
+
+	simple_lock(&map->ref_lock);
+	map->ref_count++; 
+	simple_unlock(&map->ref_lock);
+}
+
+/*
+ * uvm_map_deallocate: drop reference to a map
+ *
+ * => caller must not lock map
+ * => we will zap map if ref count goes to zero
+ */
+
+MAP_INLINE void
+uvm_map_deallocate(map)
+	vm_map_t map;
+{
+	int c;
+
+	if (map == NULL) {
+#ifdef DIAGNOSTIC
+		printf("uvm_map_deallocate: reference to NULL map\n");
+#ifdef DDB
+		Debugger();
+#endif
+#endif
+		return;
+	}
+
+	simple_lock(&map->ref_lock);
+	c = --map->ref_count;
+	simple_unlock(&map->ref_lock);
+
+	if (c > 0) {
+		return;
+	}
+
+	/*
+	 * all references gone.   unmap and free.
+	 */
+
+	uvm_unmap(map, map->min_offset, map->max_offset);
+	pmap_destroy(map->pmap);
+
+	FREE(map, M_VMMAP);
+}
+
+#endif /* defined(UVM_MAP_INLINE) || defined(UVM_MAP) */
+
+#endif /* _UVM_UVM_MAP_I_H_ */
diff --git a/sys/uvm/uvm_meter.c b/sys/uvm/uvm_meter.c
new file mode 100644
index 00000000000..e064a087e64
--- /dev/null
+++ b/sys/uvm/uvm_meter.c
@@ -0,0 +1,246 @@
+/*	$NetBSD: uvm_meter.c,v 1.7 1998/08/09 22:36:39 perry Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1982, 1986, 1989, 1993
+ *      The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor,
+ *      Washington University, and the University of California, Berkeley 
+ *      and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)vm_meter.c  8.4 (Berkeley) 1/4/94
+ * from: Id: uvm_meter.c,v 1.1.2.1 1997/08/14 19:10:35 chuck Exp
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+#include <sys/exec.h>
+
+/*
+ * maxslp: ???? XXXCDC
+ */
+
+int maxslp = MAXSLP;	/* patchable ... */
+struct loadavg averunnable; /* decl. */
+
+/*
+ * constants for averages over 1, 5, and 15 minutes when sampling at 
+ * 5 second intervals.
+ */
+
+static fixpt_t cexp[3] = {
+	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
+	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
+	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
+};
+
+/*
+ * prototypes
+ */
+
+static void uvm_loadav __P((struct loadavg *));
+
+/*
+ * uvm_meter: calculate load average and wake up the swapper (if needed)
+ */
+void
+uvm_meter()
+{
+	if ((time.tv_sec % 5) == 0)
+		uvm_loadav(&averunnable);
+	if (proc0.p_slptime > (maxslp / 2))
+		wakeup((caddr_t)&proc0);
+}
+
+/*
+ * uvm_loadav: compute a tenex style load average of a quantity on 
+ * 1, 5, and 15 minute internvals.
+ */
+static void
+uvm_loadav(avg)
+	struct loadavg *avg;
+{
+	int i, nrun;
+	struct proc *p;
+
+	for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		switch (p->p_stat) {
+		case SSLEEP:
+			if (p->p_priority > PZERO || p->p_slptime > 1)
+				continue;
+		/* fall through */
+		case SRUN:
+		case SIDL:
+			nrun++;
+		}
+	}
+	for (i = 0; i < 3; i++)
+		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
+		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+}
+
+/*
+ * uvm_sysctl: sysctl hook into UVM system.
+ */
+int
+uvm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+	int *name;
+	u_int namelen;
+	void *oldp;
+	size_t *oldlenp;
+	void *newp;
+	size_t newlen;
+	struct proc *p;
+{
+	struct vmtotal vmtotals;
+	struct _ps_strings _ps = { PS_STRINGS };
+
+	/* all sysctl names at this level are terminal */
+	if (namelen != 1)
+		return (ENOTDIR);		/* overloaded */
+
+	switch (name[0]) {
+	case VM_LOADAVG:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
+		    sizeof(averunnable)));
+
+	case VM_METER:
+		uvm_total(&vmtotals);
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
+		    sizeof(vmtotals)));
+
+	case VM_UVMEXP:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp,
+		    sizeof(uvmexp)));
+
+	case VM_PSSTRINGS:
+		return (sysctl_rdstruct(oldp, oldlenp, newp, &_ps,
+		    sizeof(_ps)));
+
+	default:
+		return (EOPNOTSUPP);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * uvm_total: calculate the current state of the system.
+ */
+void
+uvm_total(totalp)
+	struct vmtotal *totalp;
+{
+	struct proc *p;
+#if 0
+	vm_map_entry_t	entry;
+	vm_map_t map;
+	int paging;
+#endif
+
+	bzero(totalp, sizeof *totalp);
+
+	/*
+	 * calculate process statistics
+	 */
+
+	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+		if (p->p_flag & P_SYSTEM)
+			continue;
+		switch (p->p_stat) {
+		case 0:
+			continue;
+
+		case SSLEEP:
+		case SSTOP:
+			if (p->p_flag & P_INMEM) {
+				if (p->p_priority <= PZERO)
+					totalp->t_dw++;
+				else if (p->p_slptime < maxslp)
+					totalp->t_sl++;
+			} else if (p->p_slptime < maxslp)
+				totalp->t_sw++;
+			if (p->p_slptime >= maxslp)
+				continue;
+			break;
+
+		case SRUN:
+		case SIDL:
+			if (p->p_flag & P_INMEM)
+				totalp->t_rq++;
+			else
+				totalp->t_sw++;
+			if (p->p_stat == SIDL)
+				continue;
+			break;
+		}
+		/*
+		 * note active objects
+		 */
+#if 0
+		/*
+		 * XXXCDC: BOGUS!  rethink this.   in the mean time
+		 * don't do it.
+		 */
+		paging = 0;
+		vm_map_lock(map);
+		for (map = &p->p_vmspace->vm_map, entry = map->header.next;
+		    entry != &map->header; entry = entry->next) {
+			if (entry->is_a_map || entry->is_sub_map ||
+			    entry->object.uvm_obj == NULL)
+				continue;
+			/* XXX how to do this with uvm */
+		}
+		vm_map_unlock(map);
+		if (paging)
+			totalp->t_pw++;
+#endif
+	}
+	/*
+	 * Calculate object memory usage statistics.
+	 */
+	totalp->t_free = uvmexp.free;
+	totalp->t_vm = uvmexp.npages - uvmexp.free + uvmexp.swpginuse;
+	totalp->t_avm = uvmexp.active + uvmexp.swpginuse;	/* XXX */
+	totalp->t_rm = uvmexp.npages - uvmexp.free;
+	totalp->t_arm = uvmexp.active;
+	totalp->t_vmshr = 0;		/* XXX */
+	totalp->t_avmshr = 0;		/* XXX */
+	totalp->t_rmshr = 0;		/* XXX */
+	totalp->t_armshr = 0;		/* XXX */
+}
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
new file mode 100644
index 00000000000..66724213c55
--- /dev/null
+++ b/sys/uvm/uvm_mmap.c
@@ -0,0 +1,963 @@
+/*	$NetBSD: uvm_mmap.c,v 1.15 1998/10/11 23:18:20 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.  
+ * Copyright (c) 1988 University of Utah.
+ * 
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the Charles D. Cranor,
+ *	Washington University, University of California, Berkeley and 
+ *	its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
+ *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
+ * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
+ */
+
+/*
+ * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
+ * function.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <sys/syscallargs.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_device.h>
+#include <uvm/uvm_vnode.h>
+
+
+/*
+ * unimplemented VM system calls:
+ */
+
+/*
+ * sys_sbrk: sbrk system call.
+ */
+
+/* ARGSUSED */
+int
+sys_sbrk(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+#if 0
+	struct sys_sbrk_args /* {
+			  syscallarg(int) incr;
+			  } */ *uap = v;
+#endif
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * sys_sstk: sstk system call.
+ */
+
+/* ARGSUSED */
+int
+sys_sstk(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+#if 0
+	struct sys_sstk_args /* {
+			  syscallarg(int) incr;
+			  } */ *uap = v;
+#endif
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * sys_madvise: give advice about memory usage.
+ */
+
+/* ARGSUSED */
+int
+sys_madvise(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+#if 0
+	struct sys_madvise_args /* {
+			     syscallarg(caddr_t) addr;
+			     syscallarg(size_t) len;
+			     syscallarg(int) behav;
+			     } */ *uap = v;
+#endif
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * sys_mincore: determine if pages are in core or not.
+ */
+
+/* ARGSUSED */
+int
+sys_mincore(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+#if 0
+	struct sys_mincore_args /* {
+			     syscallarg(caddr_t) addr;
+			     syscallarg(size_t) len;
+			     syscallarg(char *) vec;
+			     } */ *uap = v;
+#endif
+
+	return (EOPNOTSUPP);
+}
+
+#if 0
+/*
+ * munmapfd: unmap file descriptor
+ *
+ * XXX: is this acutally a useful function?   could it be useful?
+ */
+
+void
+munmapfd(p, fd)
+	struct proc *p;
+	int fd;
+{
+
+	/*
+	 * XXX should vm_deallocate any regions mapped to this file
+	 */
+	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+}
+#endif
+
+/*
+ * sys_mmap: mmap system call.
+ *
+ * => file offest and address may not be page aligned
+ *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
+ *    - if address isn't page aligned the mapping starts at trunc_page(addr)
+ *      and the return value is adjusted up by the page offset.
+ */
+
+int
+sys_mmap(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	register struct sys_mmap_args /* {
+		syscallarg(caddr_t) addr;
+		syscallarg(size_t) len;
+		syscallarg(int) prot;
+		syscallarg(int) flags;
+		syscallarg(int) fd;
+		syscallarg(long) pad;
+		syscallarg(off_t) pos;
+	} */ *uap = v;
+	vaddr_t addr;
+	struct vattr va;
+	off_t pos;
+	vsize_t size, pageoff;
+	vm_prot_t prot, maxprot;
+	int flags, fd;
+	vaddr_t vm_min_address = VM_MIN_ADDRESS;
+	register struct filedesc *fdp = p->p_fd;
+	register struct file *fp;
+	struct vnode *vp;
+	caddr_t handle;
+	int error;
+
+	/*
+	 * first, extract syscall args from the uap.
+	 */
+
+	addr = (vaddr_t) SCARG(uap, addr);
+	size = (vsize_t) SCARG(uap, len);
+	prot = SCARG(uap, prot) & VM_PROT_ALL;
+	flags = SCARG(uap, flags);
+	fd = SCARG(uap, fd);
+	pos = SCARG(uap, pos);
+
+	/*
+	 * make sure that the newsize fits within a vaddr_t
+	 * XXX: need to revise addressing data types
+	 */
+	if (pos + size > (vaddr_t)-PAGE_SIZE) {
+#ifdef DEBUG
+		printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
+#endif
+		return (EINVAL);
+	}
+
+	/*
+	 * align file position and save offset.  adjust size.
+	 */
+
+	pageoff = (pos & PAGE_MASK);
+	pos  -= pageoff;
+	size += pageoff;			/* add offset */
+	size = (vsize_t) round_page(size);	/* round up */
+	if ((ssize_t) size < 0)
+		return (EINVAL);			/* don't allow wrap */
+
+	/*
+	 * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" 
+	 */
+
+	if (flags & MAP_FIXED) {
+
+		/* ensure address and file offset are aligned properly */
+		addr -= pageoff;
+		if (addr & PAGE_MASK)
+			return (EINVAL);
+
+		if (VM_MAXUSER_ADDRESS > 0 &&
+		    (addr + size) > VM_MAXUSER_ADDRESS)
+			return (EINVAL);
+		if (vm_min_address > 0 && addr < vm_min_address)
+			return (EINVAL);
+		if (addr > addr + size)
+			return (EINVAL);		/* no wrapping! */
+
+	} else {
+
+		/*
+		 * not fixed: make sure we skip over the largest possible heap.
+		 * we will refine our guess later (e.g. to account for VAC, etc)
+		 */
+		if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
+			addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
+	}
+
+	/*
+	 * check for file mappings (i.e. not anonymous) and verify file.
+	 */
+
+	if ((flags & MAP_ANON) == 0) {
+
+		if (fd < 0 || fd >= fdp->fd_nfiles)
+			return(EBADF);		/* failed range check? */
+		fp = fdp->fd_ofiles[fd];	/* convert to file pointer */
+		if (fp == NULL)
+			return(EBADF);
+
+		if (fp->f_type != DTYPE_VNODE)
+			return (ENODEV);		/* only mmap vnodes! */
+		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
+
+		if (vp->v_type != VREG && vp->v_type != VCHR &&
+		    vp->v_type != VBLK)
+			return (ENODEV);  /* only REG/CHR/BLK support mmap */
+
+		/* special case: catch SunOS style /dev/zero */
+		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+			flags |= MAP_ANON;
+			goto is_anon;
+		}
+
+		/*
+		 * Old programs may not select a specific sharing type, so
+		 * default to an appropriate one.
+		 *
+		 * XXX: how does MAP_ANON fit in the picture?
+		 */
+		if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
+#if defined(DEBUG)
+			printf("WARNING: defaulted mmap() share type to "
+			   "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
+			   "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
+			    p->p_comm);
+#endif
+			if (vp->v_type == VCHR)
+				flags |= MAP_SHARED;	/* for a device */
+			else
+				flags |= MAP_PRIVATE;	/* for a file */
+		}
+
+		/* 
+		 * MAP_PRIVATE device mappings don't make sense (and aren't
+		 * supported anyway).  However, some programs rely on this,
+		 * so just change it to MAP_SHARED.
+		 */
+		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
+#if defined(DIAGNOSTIC)
+			printf("WARNING: converted MAP_PRIVATE device mapping "
+			    "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
+			    p->p_comm);
+#endif
+			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
+		}
+
+		/*
+		 * now check protection
+		 */
+
+		maxprot = VM_PROT_EXECUTE;
+
+		/* check read access */
+		if (fp->f_flag & FREAD)
+			maxprot |= VM_PROT_READ;
+		else if (prot & PROT_READ)
+			return (EACCES);
+
+		/* check write access, shared case first */
+		if (flags & MAP_SHARED) {
+			/*
+			 * if the file is writable, only add PROT_WRITE to
+			 * maxprot if the file is not immutable, append-only.
+			 * otherwise, if we have asked for PROT_WRITE, return
+			 * EPERM.
+			 */
+			if (fp->f_flag & FWRITE) {
+				if ((error =
+				    VOP_GETATTR(vp, &va, p->p_ucred, p)))
+					return (error);
+				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
+					maxprot |= VM_PROT_WRITE;
+				else if (prot & PROT_WRITE)
+					return (EPERM);
+			}
+			else if (prot & PROT_WRITE)
+				return (EACCES);
+		} else {
+			/* MAP_PRIVATE mappings can always write to */
+			maxprot |= VM_PROT_WRITE;
+		}
+
+		/*
+		 * set handle to vnode
+		 */
+
+		handle = (caddr_t)vp;
+
+	} else {		/* MAP_ANON case */
+
+		if (fd != -1)
+			return (EINVAL);
+
+is_anon:		/* label for SunOS style /dev/zero */
+		handle = NULL;
+		maxprot = VM_PROT_ALL;
+		pos = 0;
+	}
+
+	/*
+	 * now let kernel internal function uvm_mmap do the work.
+	 */
+
+	error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+	    flags, handle, pos);
+
+	if (error == 0)
+		/* remember to add offset */
+		*retval = (register_t)(addr + pageoff);
+
+	return (error);
+}
+
+/*
+ * XXX
+ * XXX
+ * XXX
+ */
+int
+sys_omsync(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	return EOPNOTSUPP;
+}
+
+/*
+ * sys___msync13: the msync system call (a front-end for flush)
+ */
+
+int
+sys_msync(p, v, retval)		/* ART_UVM_XXX - is this correct msync? */
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_msync_args /* {
+		syscallarg(caddr_t) addr;
+		syscallarg(size_t) len;
+		syscallarg(int) flags;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	vm_map_t map;
+	int rv, flags, uvmflags;
+
+	/*
+	 * extract syscall args from the uap
+	 */
+
+	addr = (vaddr_t)SCARG(uap, addr);
+	size = (vsize_t)SCARG(uap, len);
+	flags = SCARG(uap, flags);
+
+	/* sanity check flags */
+	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
+			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
+			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
+	  return (EINVAL);
+	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
+	  flags |= MS_SYNC;
+
+	/*
+	 * align the address to a page boundary, and adjust the size accordingly
+	 */
+
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+
+	/* disallow wrap-around. */
+	if (addr + size < addr)
+		return (EINVAL);
+
+	/*
+	 * get map
+	 */
+
+	map = &p->p_vmspace->vm_map;
+
+	/*
+	 * XXXCDC: do we really need this semantic?
+	 *
+	 * XXX Gak!  If size is zero we are supposed to sync "all modified
+	 * pages with the region containing addr".  Unfortunately, we
+	 * don't really keep track of individual mmaps so we approximate
+	 * by flushing the range of the map entry containing addr.
+	 * This can be incorrect if the region splits or is coalesced
+	 * with a neighbor.
+	 */
+	if (size == 0) {
+		vm_map_entry_t entry;
+		
+		vm_map_lock_read(map);
+		rv = uvm_map_lookup_entry(map, addr, &entry);
+		if (rv == TRUE) {
+			addr = entry->start;
+			size = entry->end - entry->start;
+		}
+		vm_map_unlock_read(map);
+		if (rv == FALSE)
+			return (EINVAL);
+	}
+
+	/*
+	 * translate MS_ flags into PGO_ flags
+	 */
+	uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
+	if (flags & MS_SYNC)
+		uvmflags |= PGO_SYNCIO;
+	else
+		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
+
+	/*
+	 * doit!
+	 */
+	rv = uvm_map_clean(map, addr, addr+size, uvmflags);
+
+	/*
+	 * and return... 
+	 */
+	switch (rv) {
+	case KERN_SUCCESS:
+		return(0);
+	case KERN_INVALID_ADDRESS:
+		return (ENOMEM);
+	case KERN_FAILURE:
+		return (EIO);
+	case KERN_PAGES_LOCKED:	/* XXXCDC: uvm doesn't return this */
+		return (EBUSY);
+	default:
+		return (EINVAL);
+	}
+	/*NOTREACHED*/
+}
+
+/*
+ * sys_munmap: unmap a users memory
+ */
+
+int
+sys_munmap(p, v, retval)
+	register struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	register struct sys_munmap_args /* {
+		syscallarg(caddr_t) addr;
+		syscallarg(size_t) len;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	vm_map_t map;
+	vaddr_t vm_min_address = VM_MIN_ADDRESS;
+	struct vm_map_entry *dead_entries;
+
+	/*
+	 * get syscall args...
+	 */
+
+	addr = (vaddr_t) SCARG(uap, addr);
+	size = (vsize_t) SCARG(uap, len);
+	
+	/*
+	 * align the address to a page boundary, and adjust the size accordingly
+	 */
+
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+
+	if ((int)size < 0)
+		return (EINVAL);
+	if (size == 0)
+		return (0);
+
+	/*
+	 * Check for illegal addresses.  Watch out for address wrap...
+	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
+	 */
+	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
+		return (EINVAL);
+	if (vm_min_address > 0 && addr < vm_min_address)
+		return (EINVAL);
+	if (addr > addr + size)
+		return (EINVAL);
+	map = &p->p_vmspace->vm_map;
+
+
+	vm_map_lock(map);	/* lock map so we can checkprot */
+
+	/*
+	 * interesting system call semantic: make sure entire range is 
+	 * allocated before allowing an unmap.
+	 */
+
+	if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
+		vm_map_unlock(map);
+		return (EINVAL);
+	}
+
+	/*
+	 * doit!
+	 */
+	(void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
+
+	vm_map_unlock(map);	/* and unlock */
+
+	if (dead_entries != NULL)
+		uvm_unmap_detach(dead_entries, 0);
+
+	return (0);
+}
+
+/*
+ * sys_mprotect: the mprotect system call
+ */
+
+int
+sys_mprotect(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_mprotect_args /* {
+		syscallarg(caddr_t) addr;
+		syscallarg(int) len;
+		syscallarg(int) prot;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	vm_prot_t prot;
+	int rv;
+
+	/*
+	 * extract syscall args from uap
+	 */
+
+	addr = (vaddr_t)SCARG(uap, addr);
+	size = (vsize_t)SCARG(uap, len);
+	prot = SCARG(uap, prot) & VM_PROT_ALL;
+
+	/*
+	 * align the address to a page boundary, and adjust the size accordingly
+	 */
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+	if ((int)size < 0)
+		return (EINVAL);
+
+	/*
+	 * doit
+	 */
+
+	rv = uvm_map_protect(&p->p_vmspace->vm_map, 
+			   addr, addr+size, prot, FALSE);
+
+	if (rv == KERN_SUCCESS)
+		return (0);
+	if (rv == KERN_PROTECTION_FAILURE)
+		return (EACCES);
+	return (EINVAL);
+}
+
+/*
+ * sys_minherit: the minherit system call
+ */
+
+int
+sys_minherit(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_minherit_args /* {
+		syscallarg(caddr_t) addr;
+		syscallarg(int) len;
+		syscallarg(int) inherit;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	register vm_inherit_t inherit;
+	
+	addr = (vaddr_t)SCARG(uap, addr);
+	size = (vsize_t)SCARG(uap, len);
+	inherit = SCARG(uap, inherit);
+	/*
+	 * align the address to a page boundary, and adjust the size accordingly
+	 */
+
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+
+	if ((int)size < 0)
+		return (EINVAL);
+	
+	switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+			 inherit)) {
+	case KERN_SUCCESS:
+		return (0);
+	case KERN_PROTECTION_FAILURE:
+		return (EACCES);
+	}
+	return (EINVAL);
+}
+
+/*
+ * sys_mlock: memory lock
+ */
+
+int
+sys_mlock(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_mlock_args /* {
+		syscallarg(const void *) addr;
+		syscallarg(size_t) len;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	int error;
+
+	/*
+	 * extract syscall args from uap
+	 */
+	addr = (vaddr_t)SCARG(uap, addr);
+	size = (vsize_t)SCARG(uap, len);
+
+	/*
+	 * align the address to a page boundary and adjust the size accordingly
+	 */
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+	
+	/* disallow wrap-around. */
+	if (addr + (int)size < addr)
+		return (EINVAL);
+
+	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
+		return (EAGAIN);
+
+#ifdef pmap_wired_count
+	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+			p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+		return (EAGAIN);
+#else
+	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+		return (error);
+#endif
+
+	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * sys_munlock: unlock wired pages
+ */
+
+int
+sys_munlock(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_munlock_args /* {
+		syscallarg(const void *) addr;
+		syscallarg(size_t) len;
+	} */ *uap = v;
+	vaddr_t addr;
+	vsize_t size, pageoff;
+	int error;
+
+	/*
+	 * extract syscall args from uap
+	 */
+
+	addr = (vaddr_t)SCARG(uap, addr);
+	size = (vsize_t)SCARG(uap, len);
+
+	/*
+	 * align the address to a page boundary, and adjust the size accordingly
+	 */
+	pageoff = (addr & PAGE_MASK);
+	addr -= pageoff;
+	size += pageoff;
+	size = (vsize_t) round_page(size);
+
+	/* disallow wrap-around. */
+	if (addr + (int)size < addr)
+		return (EINVAL);
+
+#ifndef pmap_wired_count
+	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+		return (error);
+#endif
+
+	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+	return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * uvm_mmap: internal version of mmap
+ *
+ * - used by sys_mmap, exec, and sysv shm
+ * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
+ *	sysv shm uses "named anonymous memory")
+ * - caller must page-align the file offset
+ */
+
+int
+uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
+	vm_map_t map;
+	vaddr_t *addr;
+	vsize_t size;
+	vm_prot_t prot, maxprot;
+	int flags;
+	caddr_t handle;		/* XXX: VNODE? */
+	vaddr_t foff;
+{
+	struct uvm_object *uobj;
+	struct vnode *vp;
+	int retval;
+	int advice = UVM_ADV_NORMAL;
+	uvm_flag_t uvmflag = 0;
+
+	/*
+	 * check params
+	 */
+
+	if (size == 0)
+		return(0);
+	if (foff & PAGE_MASK)
+		return(EINVAL);
+	if ((prot & maxprot) != prot)
+		return(EINVAL);
+
+	/*
+	 * for non-fixed mappings, round off the suggested address.
+	 * for fixed mappings, check alignment and zap old mappings.
+	 */
+
+	if ((flags & MAP_FIXED) == 0) {
+		*addr = round_page(*addr);	/* round */
+	} else {
+		
+		if (*addr & PAGE_MASK)
+			return(EINVAL);
+		uvmflag |= UVM_FLAG_FIXED;
+		(void) uvm_unmap(map, *addr, *addr + size);	/* zap! */
+	}
+
+	/*
+	 * handle anon vs. non-anon mappings.   for non-anon mappings attach
+	 * to underlying vm object.
+	 */
+
+	if (flags & MAP_ANON) {
+		
+		foff = UVM_UNKNOWN_OFFSET;		
+		uobj = NULL;
+		if ((flags & MAP_SHARED) == 0)
+			/* XXX: defer amap create */
+			uvmflag |= UVM_FLAG_COPYONW;
+		else
+			/* shared: create amap now */
+			uvmflag |= UVM_FLAG_OVERLAY;
+
+	} else {
+
+		vp = (struct vnode *) handle;	/* get vnode */
+		if (vp->v_type != VCHR) {
+			uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
+			   maxprot : (maxprot & ~VM_PROT_WRITE));
+
+			/*
+			 * XXXCDC: hack from old code
+			 * don't allow vnodes which have been mapped
+			 * shared-writeable to persist [forces them to be
+			 * flushed out when last reference goes].
+			 * XXXCDC: interesting side effect: avoids a bug.
+			 * note that in WRITE [ufs_readwrite.c] that we
+			 * allocate buffer, uncache, and then do the write.
+			 * the problem with this is that if the uncache causes
+			 * VM data to be flushed to the same area of the file
+			 * we are writing to... in that case we've got the
+			 * buffer locked and our process goes to sleep forever.
+			 *
+			 * XXXCDC: checking maxprot protects us from the
+			 * "persistbug" program but this is not a long term
+			 * solution.
+			 * 
+			 * XXXCDC: we don't bother calling uncache with the vp
+			 * VOP_LOCKed since we know that we are already
+			 * holding a valid reference to the uvn (from the
+			 * uvn_attach above), and thus it is impossible for
+			 * the uncache to kill the uvn and trigger I/O.
+			 */
+			if (flags & MAP_SHARED) {
+				if ((prot & VM_PROT_WRITE) ||
+				    (maxprot & VM_PROT_WRITE)) {
+					uvm_vnp_uncache(vp);
+				}
+			}
+
+		} else {
+			uobj = udv_attach((void *) &vp->v_rdev,
+			    (flags & MAP_SHARED) ?
+			    maxprot : (maxprot & ~VM_PROT_WRITE));
+			advice = UVM_ADV_RANDOM;
+		}
+		
+		if (uobj == NULL)
+			return((vp->v_type == VREG) ? ENOMEM : EINVAL);
+
+		if ((flags & MAP_SHARED) == 0)
+			uvmflag |= UVM_FLAG_COPYONW;
+	}
+
+	/*
+	 * set up mapping flags
+	 */
+
+	uvmflag = UVM_MAPFLAG(prot, maxprot, 
+			(flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
+			advice, uvmflag);
+
+	/*
+	 * do it!
+	 */
+
+	retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
+
+	if (retval == KERN_SUCCESS)
+		return(0);
+
+	/*
+	 * errors: first detach from the uobj, if any.
+	 */
+	
+	if (uobj)
+		uobj->pgops->pgo_detach(uobj);
+
+	switch (retval) {
+	case KERN_INVALID_ADDRESS:
+	case KERN_NO_SPACE:
+		return(ENOMEM);
+	case KERN_PROTECTION_FAILURE:
+		return(EACCES);
+	}
+	return(EINVAL);
+}
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
new file mode 100644
index 00000000000..10e00d1535a
--- /dev/null
+++ b/sys/uvm/uvm_object.h
@@ -0,0 +1,74 @@
+/*	$NetBSD: uvm_object.h,v 1.5 1998/03/09 00:58:58 mrg Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_object.h,v 1.1.2.2 1998/01/04 22:44:51 chuck Exp
+ */
+
+#ifndef _UVM_UVM_OBJECT_H_
+#define _UVM_UVM_OBJECT_H_
+
+/*
+ * uvm_object.h
+ */
+
+/*
+ * uvm_object: all that is left of mach objects.
+ */
+
+struct uvm_object {
+	simple_lock_data_t	vmobjlock;	/* lock on memq */
+	struct uvm_pagerops	*pgops;		/* pager ops */
+	struct pglist		memq;		/* pages in this object */
+	int			uo_npages;	/* # of pages in memq */
+	int			uo_refs;	/* reference count */
+};
+
+/*
+ * UVM_OBJ_KERN is a 'special' uo_refs value which indicates that the
+ * object is a kernel memory object rather than a normal one (kernel
+ * memory objects don't have reference counts -- they never die).
+ *
+ * this value is used to detected kernel object mappings at uvm_unmap()
+ * time.   normally when an object is unmapped its pages eventaully become
+ * deactivated and then paged out and/or freed.    this is not useful
+ * for kernel objects... when a kernel object is unmapped we always want
+ * to free the resources associated with the mapping.   UVM_OBJ_KERN
+ * allows us to decide which type of unmapping we want to do.
+ */
+#define UVM_OBJ_KERN	(-2)
+
+#endif /* _UVM_UVM_OBJECT_H_ */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
new file mode 100644
index 00000000000..15ad5ce99aa
--- /dev/null
+++ b/sys/uvm/uvm_page.c
@@ -0,0 +1,1122 @@
+/*	$NetBSD: uvm_page.c,v 1.15 1998/10/18 23:50:00 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
+ * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_page.c: page ops.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_PAGE                /* pull in uvm_page.h functions */
+#include <uvm/uvm.h>
+
+/*
+ * global vars... XXXCDC: move to uvm. structure.
+ */
+
+/*
+ * physical memory config is stored in vm_physmem.
+ */
+
+struct vm_physseg vm_physmem[VM_PHYSSEG_MAX];	/* XXXCDC: uvm.physmem */
+int vm_nphysseg = 0;				/* XXXCDC: uvm.nphysseg */
+
+/*
+ * local variables
+ */
+
+/*
+ * these variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes.  The implementation of uvm_pageboot_alloc
+ * and pmap_startup here also uses them internally.
+ */
+
+static vaddr_t      virtual_space_start;
+static vaddr_t      virtual_space_end;
+
+/*
+ * we use a hash table with only one bucket during bootup.  we will
+ * later rehash (resize) the hash table once malloc() is ready.
+ * we static allocate the bootstrap bucket below...
+ */
+
+static struct pglist uvm_bootbucket;
+
+/*
+ * local prototypes
+ */
+
+static void uvm_pageinsert __P((struct vm_page *));
+
+
+/*
+ * inline functions
+ */
+
+/*
+ * uvm_pageinsert: insert a page in the object and the hash table
+ *
+ * => caller must lock object
+ * => caller must lock page queues
+ * => call should have already set pg's object and offset pointers
+ *    and bumped the version counter
+ */
+
+__inline static void
+uvm_pageinsert(pg)
+	struct vm_page *pg;
+{
+	struct pglist *buck;
+	int s;
+
+#ifdef DIAGNOSTIC
+	if (pg->flags & PG_TABLED)
+		panic("uvm_pageinsert: already inserted");
+#endif
+
+	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+	s = splimp();
+	simple_lock(&uvm.hashlock);
+	TAILQ_INSERT_TAIL(buck, pg, hashq);	/* put in hash */
+	simple_unlock(&uvm.hashlock);
+	splx(s);
+
+	TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
+	pg->flags |= PG_TABLED;
+	pg->uobject->uo_npages++;
+
+}
+
+/*
+ * uvm_page_remove: remove page from object and hash
+ *
+ * => caller must lock object
+ * => caller must lock page queues
+ */
+
+void __inline
+uvm_pageremove(pg)
+	struct vm_page *pg;
+{
+	struct pglist *buck;
+	int s;
+
+#ifdef DIAGNOSTIC
+	if ((pg->flags & (PG_FAULTING)) != 0)
+		panic("uvm_pageremove: page is faulting");
+#endif
+
+	if ((pg->flags & PG_TABLED) == 0)
+		return;				/* XXX: log */
+
+	buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+	s = splimp();
+	simple_lock(&uvm.hashlock);
+	TAILQ_REMOVE(buck, pg, hashq);
+	simple_unlock(&uvm.hashlock);
+	splx(s);
+
+	/* object should be locked */
+	TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
+
+	pg->flags &= ~PG_TABLED;
+	pg->uobject->uo_npages--;
+	pg->uobject = NULL;
+	pg->version++;
+
+}
+
+/*
+ * uvm_page_init: init the page system.   called from uvm_init().
+ * 
+ * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
+ */
+
+void
+uvm_page_init(kvm_startp, kvm_endp)
+	vaddr_t *kvm_startp, *kvm_endp;
+{
+	int freepages, pagecount;
+	vm_page_t pagearray;
+	int lcv, n, i;  
+	paddr_t paddr;
+
+
+	/*
+	 * step 1: init the page queues and page queue locks
+	 */
+	for (lcv = 0; lcv < VM_NFREELIST; lcv++)
+	  TAILQ_INIT(&uvm.page_free[lcv]);
+	TAILQ_INIT(&uvm.page_active);
+	TAILQ_INIT(&uvm.page_inactive_swp);
+	TAILQ_INIT(&uvm.page_inactive_obj);
+	simple_lock_init(&uvm.pageqlock);
+	simple_lock_init(&uvm.fpageqlock);
+
+	/*
+	 * step 2: init the <obj,offset> => <page> hash table. for now
+	 * we just have one bucket (the bootstrap bucket).   later on we
+	 * will malloc() new buckets as we dynamically resize the hash table.
+	 */
+
+	uvm.page_nhash = 1;			/* 1 bucket */
+	uvm.page_hashmask = 0;		/* mask for hash function */
+	uvm.page_hash = &uvm_bootbucket;	/* install bootstrap bucket */
+	TAILQ_INIT(uvm.page_hash);		/* init hash table */
+	simple_lock_init(&uvm.hashlock);	/* init hash table lock */
+
+	/* 
+	 * step 3: allocate vm_page structures.
+	 */
+
+	/*
+	 * sanity check:
+	 * before calling this function the MD code is expected to register
+	 * some free RAM with the uvm_page_physload() function.   our job
+	 * now is to allocate vm_page structures for this memory.
+	 */
+
+	if (vm_nphysseg == 0)
+		panic("vm_page_bootstrap: no memory pre-allocated");
+	
+	/*
+	 * first calculate the number of free pages...  
+	 *
+	 * note that we use start/end rather than avail_start/avail_end.
+	 * this allows us to allocate extra vm_page structures in case we
+	 * want to return some memory to the pool after booting.
+	 */
+	 
+	freepages = 0;
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+		freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
+
+	/*
+	 * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
+	 * use.   for each page of memory we use we need a vm_page structure.
+	 * thus, the total number of pages we can use is the total size of
+	 * the memory divided by the PAGE_SIZE plus the size of the vm_page
+	 * structure.   we add one to freepages as a fudge factor to avoid
+	 * truncation errors (since we can only allocate in terms of whole
+	 * pages).
+	 */
+	 
+	pagecount = ((freepages + 1) << PAGE_SHIFT) /
+	    (PAGE_SIZE + sizeof(struct vm_page));
+	pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
+	    sizeof(struct vm_page));
+	bzero(pagearray, pagecount * sizeof(struct vm_page));
+					 
+	/*
+	 * step 4: init the vm_page structures and put them in the correct
+	 * place...
+	 */
+
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
+	 
+		n = vm_physmem[lcv].end - vm_physmem[lcv].start;
+		if (n > pagecount) {
+			printf("uvm_page_init: lost %d page(s) in init\n",
+			    n - pagecount);
+			panic("uvm_page_init");  /* XXXCDC: shouldn't happen? */
+			/* n = pagecount; */
+		}
+		/* set up page array pointers */
+		vm_physmem[lcv].pgs = pagearray;
+		pagearray += n;
+		pagecount -= n;
+		vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
+
+		/* init and free vm_pages (we've already zeroed them) */
+		paddr = ptoa(vm_physmem[lcv].start);
+		for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
+			vm_physmem[lcv].pgs[i].phys_addr = paddr;
+			if (atop(paddr) >= vm_physmem[lcv].avail_start &&
+			    atop(paddr) <= vm_physmem[lcv].avail_end) {
+				uvmexp.npages++;
+				/* add page to free pool */
+				uvm_pagefree(&vm_physmem[lcv].pgs[i]);
+			}
+		}
+	}
+	/*
+	 * step 5: pass up the values of virtual_space_start and
+	 * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
+	 * layers of the VM.
+	 */
+
+	*kvm_startp = round_page(virtual_space_start);
+	*kvm_endp = trunc_page(virtual_space_end);
+
+	/*
+	 * step 6: init pagedaemon lock
+	 */
+
+	simple_lock_init(&uvm.pagedaemon_lock);
+
+	/*
+	 * step 7: init reserve thresholds
+	 * XXXCDC - values may need adjusting
+	 */
+	uvmexp.reserve_pagedaemon = 1;
+	uvmexp.reserve_kernel = 5;
+
+	/*
+	 * done!
+	 */
+
+}
+
+/*
+ * uvm_setpagesize: set the page size
+ * 
+ * => sets page_shift and page_mask from uvmexp.pagesize.
+ * => XXXCDC: move global vars.
+ */   
+
+void
+uvm_setpagesize()
+{
+	if (uvmexp.pagesize == 0)
+		uvmexp.pagesize = DEFAULT_PAGE_SIZE;
+	uvmexp.pagemask = uvmexp.pagesize - 1;
+	if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
+		panic("uvm_setpagesize: page size not a power of two");
+	for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
+		if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
+			break;
+}
+
+/*
+ * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
+ */
+
+vaddr_t
+uvm_pageboot_alloc(size)
+	vsize_t size;
+{
+#if defined(PMAP_STEAL_MEMORY)
+	vaddr_t addr;
+
+	/* 
+	 * defer bootstrap allocation to MD code (it may want to allocate 
+	 * from a direct-mapped segment).  pmap_steal_memory should round
+	 * off virtual_space_start/virtual_space_end.
+	 */
+
+	addr = pmap_steal_memory(size, &virtual_space_start,
+	    &virtual_space_end);
+
+	return(addr);
+
+#else /* !PMAP_STEAL_MEMORY */
+
+	vaddr_t addr, vaddr;
+	paddr_t paddr;
+
+	/* round to page size */
+	size = round_page(size);
+
+	/*
+	 * on first call to this function init ourselves.   we detect this
+	 * by checking virtual_space_start/end which are in the zero'd BSS area.
+	 */
+
+	if (virtual_space_start == virtual_space_end) {
+		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
+
+		/* round it the way we like it */
+		virtual_space_start = round_page(virtual_space_start);
+		virtual_space_end = trunc_page(virtual_space_end);
+	}
+
+	/*
+	 * allocate virtual memory for this request
+	 */
+
+	addr = virtual_space_start;
+	virtual_space_start += size;
+
+	/*
+	 * allocate and mapin physical pages to back new virtual pages
+	 */
+
+	for (vaddr = round_page(addr) ; vaddr < addr + size ;
+	    vaddr += PAGE_SIZE) {
+
+		if (!uvm_page_physget(&paddr))
+			panic("uvm_pageboot_alloc: out of memory");
+
+		/* XXX: should be wired, but some pmaps don't like that ... */
+#if defined(PMAP_NEW)
+		pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
+#else
+		pmap_enter(pmap_kernel(), vaddr, paddr,
+		    VM_PROT_READ|VM_PROT_WRITE, FALSE);
+#endif
+
+	}
+	return(addr);
+#endif	/* PMAP_STEAL_MEMORY */
+}
+
+#if !defined(PMAP_STEAL_MEMORY)
+/*
+ * uvm_page_physget: "steal" one page from the vm_physmem structure.
+ *
+ * => attempt to allocate it off the end of a segment in which the "avail"
+ *    values match the start/end values.   if we can't do that, then we
+ *    will advance both values (making them equal, and removing some
+ *    vm_page structures from the non-avail area).
+ * => return false if out of memory.
+ */
+
+boolean_t
+uvm_page_physget(paddrp)
+	paddr_t *paddrp;
+{
+	int lcv, x;
+
+	/* pass 1: try allocating from a matching end */
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
+#else
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+#endif
+	{
+
+		if (vm_physmem[lcv].pgs)
+			panic("vm_page_physget: called _after_ bootstrap");
+
+		/* try from front */
+		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
+		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
+			*paddrp = ptoa(vm_physmem[lcv].avail_start);
+			vm_physmem[lcv].avail_start++;
+			vm_physmem[lcv].start++;
+			/* nothing left?   nuke it */
+			if (vm_physmem[lcv].avail_start ==
+			    vm_physmem[lcv].end) {
+				if (vm_nphysseg == 1)
+				    panic("vm_page_physget: out of memory!");
+				vm_nphysseg--;
+				for (x = lcv ; x < vm_nphysseg ; x++)
+					/* structure copy */
+					vm_physmem[x] = vm_physmem[x+1];
+			}
+			return (TRUE);
+		}
+
+		/* try from rear */
+		if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
+		    vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
+			*paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
+			vm_physmem[lcv].avail_end--;
+			vm_physmem[lcv].end--;
+			/* nothing left?   nuke it */
+			if (vm_physmem[lcv].avail_end ==
+			    vm_physmem[lcv].start) {
+				if (vm_nphysseg == 1)
+				    panic("vm_page_physget: out of memory!");
+				vm_nphysseg--;
+				for (x = lcv ; x < vm_nphysseg ; x++)
+					/* structure copy */
+					vm_physmem[x] = vm_physmem[x+1];
+			}
+			return (TRUE);
+		}
+	}
+
+	/* pass2: forget about matching ends, just allocate something */
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+	for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
+#else
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+#endif
+	{
+
+		/* any room in this bank? */
+		if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
+			continue;  /* nope */
+
+		*paddrp = ptoa(vm_physmem[lcv].avail_start);
+		vm_physmem[lcv].avail_start++;
+		/* truncate! */
+		vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
+
+		/* nothing left?   nuke it */
+		if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
+			if (vm_nphysseg == 1)
+				panic("vm_page_physget: out of memory!");
+			vm_nphysseg--;
+			for (x = lcv ; x < vm_nphysseg ; x++)
+				/* structure copy */
+				vm_physmem[x] = vm_physmem[x+1];
+		}
+		return (TRUE);
+	}
+
+	return (FALSE);        /* whoops! */
+}
+#endif /* PMAP_STEAL_MEMORY */
+
+/*
+ * uvm_page_physload: load physical memory into VM system
+ *
+ * => all args are PFs
+ * => all pages in start/end get vm_page structures
+ * => areas marked by avail_start/avail_end get added to the free page pool
+ * => we are limited to VM_PHYSSEG_MAX physical memory segments
+ */
+
+void
+uvm_page_physload(start, end, avail_start, avail_end, free_list)
+	vaddr_t start, end, avail_start, avail_end;
+	int free_list;
+{
+	int preload, lcv;
+	psize_t npages;
+	struct vm_page *pgs;
+	struct vm_physseg *ps;
+
+	if (uvmexp.pagesize == 0)
+		panic("vm_page_physload: page size not set!");
+
+	if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
+		panic("uvm_page_physload: bad free list %d\n", free_list);
+
+	/*
+	 * do we have room?
+	 */
+	if (vm_nphysseg == VM_PHYSSEG_MAX) {
+		printf("vm_page_physload: unable to load physical memory "
+		    "segment\n");
+		printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n",
+		    VM_PHYSSEG_MAX, start, end);
+		return;
+	}
+
+	/*
+	 * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
+	 * called yet, so malloc is not available).
+	 */
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
+		if (vm_physmem[lcv].pgs)
+			break;
+	}
+	preload = (lcv == vm_nphysseg);
+
+	/*
+	 * if VM is already running, attempt to malloc() vm_page structures
+	 */
+	if (!preload) {
+#if defined(VM_PHYSSEG_NOADD)
+		panic("vm_page_physload: tried to add RAM after vm_mem_init");
+#else
+		/* XXXCDC: need some sort of lockout for this case */
+		paddr_t paddr;
+		npages = end - start;  /* # of pages */
+		MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages,
+					 M_VMPAGE, M_NOWAIT);
+		if (pgs == NULL) {
+			printf("vm_page_physload: can not malloc vm_page "
+			    "structs for segment\n");
+			printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
+			return;
+		}
+		/* zero data, init phys_addr and free_list, and free pages */
+		bzero(pgs, sizeof(struct vm_page) * npages);
+		for (lcv = 0, paddr = ptoa(start) ;
+				 lcv < npages ; lcv++, paddr += PAGE_SIZE) {
+			pgs[lcv].phys_addr = paddr;
+			pgs[lcv].free_list = free_list;
+			if (atop(paddr) >= avail_start &&
+			    atop(paddr) <= avail_end)
+				uvm_pagefree(&pgs[lcv]);
+		}
+		/* XXXCDC: incomplete: need to update uvmexp.free, what else? */
+		/* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
+#endif
+	} else {
+
+		/* gcc complains if these don't get init'd */
+		pgs = NULL;
+		npages = 0;
+
+	}
+
+	/*
+	 * now insert us in the proper place in vm_physmem[]
+	 */
+
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
+
+	/* random: put it at the end (easy!) */
+	ps = &vm_physmem[vm_nphysseg];
+
+#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
+
+	{
+		int x;
+		/* sort by address for binary search */
+		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+			if (start < vm_physmem[lcv].start)
+				break;
+		ps = &vm_physmem[lcv];
+		/* move back other entries, if necessary ... */
+		for (x = vm_nphysseg ; x > lcv ; x--)
+			/* structure copy */
+			vm_physmem[x] = vm_physmem[x - 1];
+	}
+
+#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+
+	{
+		int x;
+		/* sort by largest segment first */
+		for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+			if ((end - start) >
+			    (vm_physmem[lcv].end - vm_physmem[lcv].start))
+				break;
+		ps = &vm_physmem[lcv];
+		/* move back other entries, if necessary ... */
+		for (x = vm_nphysseg ; x > lcv ; x--)
+			/* structure copy */
+			vm_physmem[x] = vm_physmem[x - 1];
+	}
+
+#else
+
+	panic("vm_page_physload: unknown physseg strategy selected!");
+
+#endif
+
+	ps->start = start;
+	ps->end = end;
+	ps->avail_start = avail_start;
+	ps->avail_end = avail_end;
+	if (preload) {
+		ps->pgs = NULL;
+	} else {
+		ps->pgs = pgs;
+		ps->lastpg = pgs + npages - 1;
+	}
+	ps->free_list = free_list;
+	vm_nphysseg++;
+
+	/*
+	 * done!
+	 */
+
+	if (!preload)
+		uvm_page_rehash();
+
+	return;
+}
+
+/*
+ * uvm_page_rehash: reallocate hash table based on number of free pages.
+ */
+
+void
+uvm_page_rehash()
+{
+	int freepages, lcv, bucketcount, s, oldcount;
+	struct pglist *newbuckets, *oldbuckets;
+	struct vm_page *pg;
+
+	/*
+	 * compute number of pages that can go in the free pool
+	 */
+
+	freepages = 0;
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+		freepages +=
+		    (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
+
+	/*
+	 * compute number of buckets needed for this number of pages
+	 */
+
+	bucketcount = 1;
+	while (bucketcount < freepages)
+		bucketcount = bucketcount * 2;
+
+	/*
+	 * malloc new buckets
+	 */
+
+	MALLOC(newbuckets, struct pglist *, sizeof(struct pglist) * bucketcount,
+					 M_VMPBUCKET, M_NOWAIT);
+	if (newbuckets == NULL) {
+		printf("vm_page_physrehash: WARNING: could not grow page "
+		    "hash table\n");
+		return;
+	}
+	for (lcv = 0 ; lcv < bucketcount ; lcv++)
+		TAILQ_INIT(&newbuckets[lcv]);
+
+	/*
+	 * now replace the old buckets with the new ones and rehash everything
+	 */
+
+	s = splimp();
+	simple_lock(&uvm.hashlock);
+	/* swap old for new ... */
+	oldbuckets = uvm.page_hash;
+	oldcount = uvm.page_nhash;
+	uvm.page_hash = newbuckets;
+	uvm.page_nhash = bucketcount;
+	uvm.page_hashmask = bucketcount - 1;  /* power of 2 */
+
+	/* ... and rehash */
+	for (lcv = 0 ; lcv < oldcount ; lcv++) {
+		while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
+			TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
+			TAILQ_INSERT_TAIL(
+			  &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
+			  pg, hashq);
+		}
+	}
+	simple_unlock(&uvm.hashlock);
+	splx(s);
+
+	/*
+	 * free old bucket array if we malloc'd it previously
+	 */
+
+	if (oldbuckets != &uvm_bootbucket)
+		FREE(oldbuckets, M_VMPBUCKET);
+
+	/*
+	 * done
+	 */
+	return;
+}
+
+
+#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
+
+void uvm_page_physdump __P((void)); /* SHUT UP GCC */
+
+/* call from DDB */
+void
+uvm_page_physdump()
+{
+	int lcv;
+
+	printf("rehash: physical memory config [segs=%d of %d]:\n",
+				 vm_nphysseg, VM_PHYSSEG_MAX);
+	for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+		printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start,
+		    vm_physmem[lcv].end, vm_physmem[lcv].avail_start,
+		    vm_physmem[lcv].avail_end);
+	printf("STRATEGY = ");
+	switch (VM_PHYSSEG_STRAT) {
+	case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
+	case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
+	case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
+	default: printf("<<UNKNOWN>>!!!!\n");
+	}
+	printf("number of buckets = %d\n", uvm.page_nhash);
+}
+#endif
+
+/*
+ * uvm_pagealloc_strat: allocate vm_page from a particular free list.
+ *
+ * => return null if no pages free
+ * => wake up pagedaemon if number of free pages drops below low water mark
+ * => if obj != NULL, obj must be locked (to put in hash)
+ * => if anon != NULL, anon must be locked (to put in anon)
+ * => only one of obj or anon can be non-null
+ * => caller must activate/deactivate page if it is not wired.
+ * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
+ */
+
+struct vm_page *
+uvm_pagealloc_strat(obj, off, anon, strat, free_list)
+	struct uvm_object *obj;
+	vaddr_t off;
+	struct vm_anon *anon;
+	int strat, free_list;
+{
+	int lcv, s;
+	struct vm_page *pg;
+	struct pglist *freeq;
+
+#ifdef DIAGNOSTIC
+	/* sanity check */
+	if (obj && anon)
+		panic("uvm_pagealloc: obj and anon != NULL");
+#endif
+
+	s = splimp();
+
+	uvm_lock_fpageq();		/* lock free page queue */
+
+	/*
+	 * check to see if we need to generate some free pages waking
+	 * the pagedaemon.
+	 */
+
+	if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
+	    uvmexp.inactive < uvmexp.inactarg))
+		thread_wakeup(&uvm.pagedaemon);
+
+	/*
+	 * fail if any of these conditions is true:
+	 * [1]  there really are no free pages, or
+	 * [2]  only kernel "reserved" pages remain and
+	 *        the page isn't being allocated to a kernel object.
+	 * [3]  only pagedaemon "reserved" pages remain and
+	 *        the requestor isn't the pagedaemon.
+	 */
+
+	if ((uvmexp.free <= uvmexp.reserve_kernel &&
+	     !(obj && obj->uo_refs == UVM_OBJ_KERN)) ||
+	    (uvmexp.free <= uvmexp.reserve_pagedaemon &&
+	     !(obj == uvmexp.kmem_object && curproc == uvm.pagedaemon_proc)))
+		goto fail;
+
+ again:
+	switch (strat) {
+	case UVM_PGA_STRAT_NORMAL:
+		/* Check all freelists in descending priority order. */
+		for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
+			freeq = &uvm.page_free[lcv];
+			if ((pg = freeq->tqh_first) != NULL)
+				goto gotit;
+		}
+
+		/* No pages free! */
+		goto fail;
+
+	case UVM_PGA_STRAT_ONLY:
+	case UVM_PGA_STRAT_FALLBACK:
+		/* Attempt to allocate from the specified free list. */
+#ifdef DIAGNOSTIC
+		if (free_list >= VM_NFREELIST || free_list < 0)
+			panic("uvm_pagealloc_strat: bad free list %d",
+			    free_list);
+#endif
+		freeq = &uvm.page_free[free_list];
+		if ((pg = freeq->tqh_first) != NULL)
+			goto gotit;
+
+		/* Fall back, if possible. */
+		if (strat == UVM_PGA_STRAT_FALLBACK) {
+			strat = UVM_PGA_STRAT_NORMAL;
+			goto again;
+		}
+
+		/* No pages free! */
+		goto fail;
+
+	default:
+		panic("uvm_pagealloc_strat: bad strat %d", strat);
+		/* NOTREACHED */
+	}
+
+ gotit:
+	TAILQ_REMOVE(freeq, pg, pageq);
+	uvmexp.free--;
+
+	uvm_unlock_fpageq();		/* unlock free page queue */
+	splx(s);
+
+	pg->offset = off;
+	pg->uobject = obj;
+	pg->uanon = anon;
+	pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
+	pg->version++;
+	pg->wire_count = 0;
+	pg->loan_count = 0;
+	if (anon) {
+		anon->u.an_page = pg;
+		pg->pqflags = PQ_ANON;
+	} else {
+		if (obj)
+			uvm_pageinsert(pg);
+		pg->pqflags = 0;
+	}
+#if defined(UVM_PAGE_TRKOWN)
+	pg->owner_tag = NULL;
+#endif
+	UVM_PAGE_OWN(pg, "new alloc");
+
+	return(pg);
+
+ fail:
+	uvm_unlock_fpageq();
+	splx(s);
+	return (NULL);
+}
+
+/*
+ * uvm_pagerealloc: reallocate a page from one object to another
+ *
+ * => both objects must be locked
+ */
+
+void
+uvm_pagerealloc(pg, newobj, newoff)
+	struct vm_page *pg;
+	struct uvm_object *newobj;
+	vaddr_t newoff;
+{
+	/*
+	 * remove it from the old object
+	 */
+
+	if (pg->uobject) {
+		uvm_pageremove(pg);
+	}
+
+	/*
+	 * put it in the new object
+	 */
+
+	if (newobj) {
+		pg->uobject = newobj;
+		pg->offset = newoff;
+		pg->version++;
+		uvm_pageinsert(pg);
+	}
+ 
+	return;
+}
+
+
+/*
+ * uvm_pagefree: free page
+ *
+ * => erase page's identity (i.e. remove from hash/object)
+ * => put page on free list
+ * => caller must lock owning object (either anon or uvm_object)
+ * => caller must lock page queues
+ * => assumes all valid mappings of pg are gone
+ */
+
+void uvm_pagefree(pg)
+
+struct vm_page *pg;
+
+{
+	int s;
+	int saved_loan_count = pg->loan_count;
+
+	/*
+	 * if the page was an object page (and thus "TABLED"), remove it
+	 * from the object.
+	 */
+
+	if (pg->flags & PG_TABLED) {
+
+		/*
+		 * if the object page is on loan we are going to drop ownership.  
+		 * it is possible that an anon will take over as owner for this
+		 * page later on.   the anon will want a !PG_CLEAN page so that
+		 * it knows it needs to allocate swap if it wants to page the 
+		 * page out. 
+		 */
+
+		if (saved_loan_count)
+			pg->flags &= ~PG_CLEAN;	/* in case an anon takes over */
+
+		uvm_pageremove(pg);
+		
+		/*
+		 * if our page was on loan, then we just lost control over it
+		 * (in fact, if it was loaned to an anon, the anon may have
+		 * already taken over ownership of the page by now and thus
+		 * changed the loan_count [e.g. in uvmfault_anonget()]) we just 
+		 * return (when the last loan is dropped, then the page can be 
+		 * freed by whatever was holding the last loan).
+		 */
+		if (saved_loan_count) 
+			return;
+
+	} else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
+
+		/*
+		 * if our page is owned by an anon and is loaned out to the
+		 * kernel then we just want to drop ownership and return.
+		 * the kernel must free the page when all its loans clear ...
+		 * note that the kernel can't change the loan status of our
+		 * page as long as we are holding PQ lock.
+		 */
+		pg->pqflags &= ~PQ_ANON;
+		pg->uanon = NULL;
+		return;
+	}
+
+#ifdef DIAGNOSTIC
+	if (saved_loan_count) {
+		printf("uvm_pagefree: warning: freeing page with a loan "
+		    "count of %d\n", saved_loan_count);
+		panic("uvm_pagefree: loan count");
+	}
+#endif
+	
+
+	/*
+	 * now remove the page from the queues
+	 */
+
+	if (pg->pqflags & PQ_ACTIVE) {
+		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+		pg->pqflags &= ~PQ_ACTIVE;
+		uvmexp.active--;
+	}
+	if (pg->pqflags & PQ_INACTIVE) {
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+		pg->pqflags &= ~PQ_INACTIVE;
+		uvmexp.inactive--;
+	}
+
+	/*
+	 * if the page was wired, unwire it now.
+	 */
+	if (pg->wire_count)
+	{
+		pg->wire_count = 0;
+		uvmexp.wired--;
+	}
+
+	/*
+	 * and put on free queue 
+	 */
+
+	s = splimp();
+	uvm_lock_fpageq();
+	TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
+	    pg, pageq);
+	pg->pqflags = PQ_FREE;
+#ifdef DEBUG
+	pg->uobject = (void *)0xdeadbeef;
+	pg->offset = 0xdeadbeef;
+	pg->uanon = (void *)0xdeadbeef;
+#endif
+	uvmexp.free++;
+	uvm_unlock_fpageq();
+	splx(s);
+}
+
+#if defined(UVM_PAGE_TRKOWN)
+/*
+ * uvm_page_own: set or release page ownership
+ *
+ * => this is a debugging function that keeps track of who sets PG_BUSY
+ *	and where they do it.   it can be used to track down problems
+ *	such a process setting "PG_BUSY" and never releasing it.
+ * => page's object [if any] must be locked
+ * => if "tag" is NULL then we are releasing page ownership
+ */
+void
+uvm_page_own(pg, tag)
+	struct vm_page *pg;
+	char *tag;
+{
+	/* gain ownership? */
+	if (tag) {
+		if (pg->owner_tag) {
+			printf("uvm_page_own: page %p already owned "
+			    "by proc %d [%s]\n", pg,
+			     pg->owner, pg->owner_tag);
+			panic("uvm_page_own");
+		}
+		pg->owner = (curproc) ? curproc->p_pid :  (pid_t) -1;
+		pg->owner_tag = tag;
+		return;
+	}
+
+	/* drop ownership */
+	if (pg->owner_tag == NULL) {
+		printf("uvm_page_own: dropping ownership of an non-owned "
+		    "page (%p)\n", pg);
+		panic("uvm_page_own");
+	}
+	pg->owner_tag = NULL;
+	return;
+}
+#endif
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
new file mode 100644
index 00000000000..dd40fc5bee1
--- /dev/null
+++ b/sys/uvm/uvm_page.h
@@ -0,0 +1,132 @@
+/*	$NetBSD: uvm_page.h,v 1.10 1998/08/13 02:11:02 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_page.h   7.3 (Berkeley) 4/21/91
+ * from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PAGE_H_
+#define _UVM_UVM_PAGE_H_
+
+/*
+ * uvm_page.h
+ */
+
+/*
+ * macros
+ */
+
+#define uvm_lock_pageq()	simple_lock(&uvm.pageqlock)
+#define uvm_unlock_pageq()	simple_unlock(&uvm.pageqlock)
+#define uvm_lock_fpageq()	simple_lock(&uvm.fpageqlock)
+#define uvm_unlock_fpageq()	simple_unlock(&uvm.fpageqlock)
+
+#define uvm_pagehash(obj,off) \
+	(((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask)
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_PAGE_INLINE
+#define PAGE_INLINE static __inline
+#else 
+#define PAGE_INLINE /* nothing */
+#endif /* UVM_PAGE_INLINE */
+
+/*
+ * prototypes: the following prototypes define the interface to pages
+ */
+
+void uvm_page_init __P((vaddr_t *, vaddr_t *));
+#if defined(UVM_PAGE_TRKOWN)
+void uvm_page_own __P((struct vm_page *, char *));
+#endif
+#if !defined(PMAP_STEAL_MEMORY)
+boolean_t uvm_page_physget __P((paddr_t *));
+#endif
+void uvm_page_rehash __P((void));
+
+PAGE_INLINE void uvm_pageactivate __P((struct vm_page *));
+vaddr_t uvm_pageboot_alloc __P((vsize_t));
+PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *));
+PAGE_INLINE void uvm_pagedeactivate __P((struct vm_page *));
+void uvm_pagefree __P((struct vm_page *));
+PAGE_INLINE struct vm_page *uvm_pagelookup 
+					__P((struct uvm_object *, vaddr_t));
+void uvm_pageremove __P((struct vm_page *));
+/* uvm_pagerename: not needed */
+PAGE_INLINE void uvm_pageunwire __P((struct vm_page *));
+PAGE_INLINE void uvm_pagewait __P((struct vm_page *, int));
+PAGE_INLINE void uvm_pagewake __P((struct vm_page *));
+PAGE_INLINE void uvm_pagewire __P((struct vm_page *));
+PAGE_INLINE void uvm_pagezero __P((struct vm_page *));
+
+PAGE_INLINE int uvm_page_lookup_freelist __P((struct vm_page *));
+
+#endif /* _UVM_UVM_PAGE_H_ */
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
new file mode 100644
index 00000000000..5a5671a3f4e
--- /dev/null
+++ b/sys/uvm/uvm_page_i.h
@@ -0,0 +1,292 @@
+/*	$NetBSD: uvm_page_i.h,v 1.8 1998/08/13 02:11:02 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_page.c   8.3 (Berkeley) 3/21/94
+ * from: Id: uvm_page_i.h,v 1.1.2.7 1998/01/05 00:26:02 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PAGE_I_H_
+#define _UVM_UVM_PAGE_I_H_
+
+/*
+ * uvm_page_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_PAGE_INLINE) || defined(UVM_PAGE)
+
+/*
+ * uvm_pagelookup: look up a page
+ *
+ * => caller should lock object to keep someone from pulling the page
+ *	out from under it
+ */
+
+struct vm_page *
+uvm_pagelookup(obj, off)
+	struct uvm_object *obj;
+	vaddr_t off;
+{
+	struct vm_page *pg;
+	struct pglist *buck;
+	int s;
+
+	buck = &uvm.page_hash[uvm_pagehash(obj,off)];
+
+	s = splimp();
+	simple_lock(&uvm.hashlock);
+	for (pg = buck->tqh_first ; pg != NULL ; pg = pg->hashq.tqe_next) {
+		if (pg->uobject == obj && pg->offset == off) {
+			simple_unlock(&uvm.hashlock);
+			splx(s);
+			return(pg);
+		}
+	}
+	simple_unlock(&uvm.hashlock);
+	splx(s);
+	return(NULL);
+}
+
+/*
+ * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
+ *
+ * => caller must lock page queues
+ */
+
+PAGE_INLINE void
+uvm_pagewire(pg)
+	struct vm_page *pg;
+{
+
+	if (pg->wire_count == 0) {
+		if (pg->pqflags & PQ_ACTIVE) {
+			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+			pg->pqflags &= ~PQ_ACTIVE;
+			uvmexp.active--;
+		}
+		if (pg->pqflags & PQ_INACTIVE) {
+			if (pg->pqflags & PQ_SWAPBACKED)
+				TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+			else
+				TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+			pg->pqflags &= ~PQ_INACTIVE;
+			uvmexp.inactive--;
+		}
+		uvmexp.wired++;
+	}
+	pg->wire_count++;
+}
+
+/*
+ * uvm_pageunwire: unwire the page.   
+ *
+ * => activate if wire count goes to zero.
+ * => caller must lock page queues
+ */
+ 
+PAGE_INLINE void
+uvm_pageunwire(pg)
+	struct vm_page *pg;
+{
+
+	pg->wire_count--;
+	if (pg->wire_count == 0) {
+		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+		uvmexp.active++;
+		pg->pqflags |= PQ_ACTIVE;
+		uvmexp.wired--;
+	}
+}
+
+/*
+ * uvm_pagedeactivate: deactivate page -- no pmaps have access to page
+ *
+ * => caller must lock page queues
+ * => caller must check to make sure page is not wired
+ * => object that page belongs to must be locked (so we can adjust pg->flags)
+ */
+
+PAGE_INLINE void
+uvm_pagedeactivate(pg)
+	struct vm_page *pg;
+{
+	if (pg->pqflags & PQ_ACTIVE) {
+		TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+		pg->pqflags &= ~PQ_ACTIVE;
+		uvmexp.active--;
+	}
+	if ((pg->pqflags & PQ_INACTIVE) == 0) {
+#ifdef DIAGNOSTIC 
+		if (pg->wire_count)
+			panic("uvm_pagedeactivate: caller did not check "
+			    "wire count");
+#endif
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
+		pg->pqflags |= PQ_INACTIVE;
+		uvmexp.inactive++;
+		pmap_clear_reference(PMAP_PGARG(pg));
+		if (pmap_is_modified(PMAP_PGARG(pg)))
+			pg->flags &= ~PG_CLEAN;
+	}
+}
+
+/*
+ * uvm_pageactivate: activate page
+ *
+ * => caller must lock page queues
+ */
+
+PAGE_INLINE void
+uvm_pageactivate(pg)
+	struct vm_page *pg;
+{
+	if (pg->pqflags & PQ_INACTIVE) {
+		if (pg->pqflags & PQ_SWAPBACKED)
+			TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+		else
+			TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+		pg->pqflags &= ~PQ_INACTIVE;
+		uvmexp.inactive--;
+	}
+	if (pg->wire_count == 0) {
+
+		/*
+		 * if page is already active, remove it from list so we
+		 * can put it at tail.  if it wasn't active, then mark
+		 * it active and bump active count
+		 */
+		if (pg->pqflags & PQ_ACTIVE) 
+			TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+		else {
+			pg->pqflags |= PQ_ACTIVE;
+			uvmexp.active++;
+		}
+
+		TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+	}
+}
+
+/*
+ * uvm_pagezero: zero fill a page
+ *
+ * => if page is part of an object then the object should be locked
+ *	to protect pg->flags.
+ */
+
+PAGE_INLINE void
+uvm_pagezero(pg)
+	struct vm_page *pg;
+{
+
+	pg->flags &= ~PG_CLEAN;
+	pmap_zero_page(VM_PAGE_TO_PHYS(pg));
+}
+
+/*
+ * uvm_pagecopy: copy a page
+ *
+ * => if page is part of an object then the object should be locked
+ *	to protect pg->flags.
+ */
+
+PAGE_INLINE void
+uvm_pagecopy(src, dst)
+	struct vm_page *src, *dst;
+{
+
+	dst->flags &= ~PG_CLEAN;
+	pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
+}
+
+/*
+ * uvm_page_lookup_freelist: look up the free list for the specified page
+ */
+
+PAGE_INLINE int
+uvm_page_lookup_freelist(pg)
+	struct vm_page *pg;
+{
+	int lcv;
+
+	lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
+#ifdef DIAGNOSTIC
+	if (lcv == -1)
+		panic("uvm_page_lookup_freelist: unable to locate physseg");
+#endif
+	return (vm_physmem[lcv].free_list);
+}
+
+#endif /* defined(UVM_PAGE_INLINE) || defined(UVM_PAGE) */
+
+#endif /* _UVM_UVM_PAGE_I_H_ */
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
new file mode 100644
index 00000000000..1b8c8a36d3e
--- /dev/null
+++ b/sys/uvm/uvm_pager.c
@@ -0,0 +1,762 @@
+/*	$NetBSD: uvm_pager.c,v 1.14 1999/01/22 08:00:35 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
+ */
+
+/*
+ * uvm_pager.c: generic functions used to assist the pagers.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_PAGER
+#include <uvm/uvm.h>
+
+/*
+ * list of uvm pagers in the system
+ */
+
+extern struct uvm_pagerops aobj_pager;
+extern struct uvm_pagerops uvm_deviceops;
+extern struct uvm_pagerops uvm_vnodeops;
+
+struct uvm_pagerops *uvmpagerops[] = {
+	&aobj_pager,
+	&uvm_deviceops,
+	&uvm_vnodeops,
+};
+
+/*
+ * the pager map: provides KVA for I/O
+ */
+
+#define PAGER_MAP_SIZE       (4 * 1024 * 1024)
+vm_map_t pager_map;		/* XXX */
+simple_lock_data_t pager_map_wanted_lock;
+boolean_t pager_map_wanted;	/* locked by pager map */
+
+
+/*
+ * uvm_pager_init: init pagers (at boot time)
+ */
+
+void
+uvm_pager_init()
+{
+	int lcv;
+
+	/*
+	 * init pager map
+	 */
+
+	 pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
+	 			PAGER_MAP_SIZE, FALSE, FALSE, NULL);
+	 simple_lock_init(&pager_map_wanted_lock);
+	 pager_map_wanted = FALSE;
+
+	/*
+	 * init ASYNC I/O queue
+	 */
+	
+	TAILQ_INIT(&uvm.aio_done);
+
+	/*
+	 * call pager init functions
+	 */
+	for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *);
+	    lcv++) {
+		if (uvmpagerops[lcv]->pgo_init)
+			uvmpagerops[lcv]->pgo_init();
+	}
+}
+
+/*
+ * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings
+ *
+ * we basically just map in a blank map entry to reserve the space in the
+ * map and then use pmap_enter() to put the mappings in by hand.
+ */
+
+vaddr_t
+uvm_pagermapin(pps, npages, aiop, waitf)
+	struct vm_page **pps;
+	int npages;
+	struct uvm_aiodesc **aiop;	/* OUT */
+	int waitf;
+{
+	vsize_t size;
+	vaddr_t kva;
+	struct uvm_aiodesc *aio;
+#if !defined(PMAP_NEW)
+	vaddr_t cva;
+	struct vm_page *pp;
+#endif
+	UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, aiop=0x%x, waitf=%d)",
+	      pps, npages, aiop, waitf);
+
+ReStart:
+	if (aiop) {
+		MALLOC(aio, struct uvm_aiodesc *, sizeof(*aio), M_TEMP, waitf);
+		if (aio == NULL)
+			return(0);
+		*aiop = aio;
+	} else {
+		aio = NULL;
+	}
+
+	size = npages << PAGE_SHIFT;
+	kva = NULL;			/* let system choose VA */
+
+	if (uvm_map(pager_map, &kva, size, NULL, 
+	      UVM_UNKNOWN_OFFSET, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
+		if (waitf == M_NOWAIT) {
+			if (aio)
+				FREE(aio, M_TEMP);
+			UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0);
+			return(NULL);
+		}
+		simple_lock(&pager_map_wanted_lock);
+		pager_map_wanted = TRUE; 
+		UVMHIST_LOG(maphist, "  SLEEPING on pager_map",0,0,0,0);
+		UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE, 
+		    "pager_map",0);
+		goto ReStart;
+	}
+
+#if defined(PMAP_NEW)
+	/*
+	 * XXX: (ab)using the pmap module to store state info for us.
+	 * (pmap stores the PAs... we fetch them back later and convert back
+	 * to pages with PHYS_TO_VM_PAGE).
+	 */
+	pmap_kenter_pgs(kva, pps, npages);
+
+#else /* PMAP_NEW */
+
+	/* got it */
+	for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
+		pp = *pps++;
+#ifdef DEBUG
+		if ((pp->flags & PG_BUSY) == 0)
+			panic("uvm_pagermapin: page not busy");
+#endif
+
+		pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
+		    VM_PROT_DEFAULT, TRUE);
+	}
+
+#endif /* PMAP_NEW */
+
+	UVMHIST_LOG(maphist, "<- done (KVA=0x%x)", kva,0,0,0);
+	return(kva);
+}
+
+/*
+ * uvm_pagermapout: remove pager_map mapping
+ *
+ * we remove our mappings by hand and then remove the mapping (waking
+ * up anyone wanting space).
+ */
+
+void
+uvm_pagermapout(kva, npages)
+	vaddr_t kva;
+	int npages;
+{
+	vsize_t size = npages << PAGE_SHIFT;
+	vm_map_entry_t entries;
+	UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist);
+	
+	UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0);
+
+	/*
+	 * duplicate uvm_unmap, but add in pager_map_wanted handling.
+	 */
+
+	vm_map_lock(pager_map);
+	(void) uvm_unmap_remove(pager_map, kva, kva + size, &entries);
+	simple_lock(&pager_map_wanted_lock);
+	if (pager_map_wanted) {
+		pager_map_wanted = FALSE;
+		wakeup(pager_map);
+	}
+	simple_unlock(&pager_map_wanted_lock);
+	vm_map_unlock(pager_map);
+	if (entries)
+		uvm_unmap_detach(entries, 0);
+
+	UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+}
+
+/*
+ * uvm_mk_pcluster
+ *
+ * generic "make 'pager put' cluster" function.  a pager can either
+ * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this
+ * generic function, or [3] set it to a pager specific function.
+ *
+ * => caller must lock object _and_ pagequeues (since we need to look
+ *    at active vs. inactive bits, etc.)
+ * => caller must make center page busy and write-protect it
+ * => we mark all cluster pages busy for the caller
+ * => the caller must unbusy all pages (and check wanted/released
+ *    status if it drops the object lock)
+ * => flags:
+ *      PGO_ALLPAGES:  all pages in object are valid targets
+ *      !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster
+ *      PGO_DOACTCLUST: include active pages in cluster.
+ *        NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST.
+ *              PG_CLEANCHK is only a hint, but clearing will help reduce
+ *		the number of calls we make to the pmap layer.
+ */
+
+struct vm_page **
+uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
+	struct uvm_object *uobj;	/* IN */
+	struct vm_page **pps, *center;  /* IN/OUT, IN */
+	int *npages, flags;		/* IN/OUT, IN */
+	vaddr_t mlo, mhi;		/* IN (if !PGO_ALLPAGES) */
+{
+	struct vm_page **ppsp, *pclust;
+	vaddr_t lo, hi, curoff;
+	int center_idx, forward;
+	UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist);
+
+	/* 
+	 * center page should already be busy and write protected.  XXX:
+	 * suppose page is wired?  if we lock, then a process could
+	 * fault/block on it.  if we don't lock, a process could write the
+	 * pages in the middle of an I/O.  (consider an msync()).  let's
+	 * lock it for now (better to delay than corrupt data?).
+	 */
+
+	/*
+	 * get cluster boundaries, check sanity, and apply our limits as well.
+	 */
+
+	uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi);
+	if ((flags & PGO_ALLPAGES) == 0) {
+		if (lo < mlo)
+			lo = mlo;
+		if (hi > mhi)
+			hi = mhi;
+	}
+	if ((hi - lo) >> PAGE_SHIFT > *npages) {  /* pps too small, bail out! */
+#ifdef DIAGNOSTIC
+	    printf("uvm_mk_pcluster: provided page array too small (fixed)\n");
+#endif
+		pps[0] = center;
+		*npages = 1;
+		return(pps);
+	}
+
+	/*
+	 * now determine the center and attempt to cluster around the
+	 * edges
+	 */
+
+	center_idx = (center->offset - lo) >> PAGE_SHIFT;
+	pps[center_idx] = center;	/* plug in the center page */
+	ppsp = &pps[center_idx];
+	*npages = 1;
+	
+	/*
+	 * attempt to cluster around the left [backward], and then 
+	 * the right side [forward].    
+	 *
+	 * note that for inactive pages (pages that have been deactivated)
+	 * there are no valid mappings and PG_CLEAN should be up to date.
+	 * [i.e. there is no need to query the pmap with pmap_is_modified
+	 * since there are no mappings].
+	 */
+
+	for (forward  = 0 ; forward <= 1 ; forward++) {
+
+		curoff = center->offset + (forward ? PAGE_SIZE : -PAGE_SIZE);
+		for ( ;(forward == 0 && curoff >= lo) ||
+		       (forward && curoff < hi);
+		      curoff += (forward ? 1 : -1) << PAGE_SHIFT) {
+
+			pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
+			if (pclust == NULL)
+				break;			/* no page */
+			/* handle active pages */
+			/* NOTE: inactive pages don't have pmap mappings */
+			if ((pclust->pqflags & PQ_INACTIVE) == 0) {
+				if ((flags & PGO_DOACTCLUST) == 0)
+					/* dont want mapped pages at all */
+					break;
+
+				/* make sure "clean" bit is sync'd */
+				if ((pclust->flags & PG_CLEANCHK) == 0) {
+					if ((pclust->flags & (PG_CLEAN|PG_BUSY))
+					   == PG_CLEAN &&
+					   pmap_is_modified(PMAP_PGARG(pclust)))
+					pclust->flags &= ~PG_CLEAN;
+					/* now checked */
+					pclust->flags |= PG_CLEANCHK;
+				}
+			}
+			/* is page available for cleaning and does it need it */
+			if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0)
+				break;	/* page is already clean or is busy */
+
+			/* yes!   enroll the page in our array */
+			pclust->flags |= PG_BUSY;		/* busy! */
+			UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
+			/* XXX: protect wired page?   see above comment. */
+			pmap_page_protect(PMAP_PGARG(pclust), VM_PROT_READ);
+			if (!forward) {
+				ppsp--;			/* back up one page */
+				*ppsp = pclust;
+			} else {
+				/* move forward one page */
+				ppsp[*npages] = pclust;
+			}
+			*npages = *npages + 1;
+		}
+	}
+	
+	/*
+	 * done!  return the cluster array to the caller!!!
+	 */
+
+	UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+	return(ppsp);
+}
+
+
+/*
+ * uvm_shareprot: generic share protect routine
+ *
+ * => caller must lock map entry's map
+ * => caller must lock object pointed to by map entry
+ */
+
+void
+uvm_shareprot(entry, prot)
+	vm_map_entry_t entry;
+	vm_prot_t prot;
+{
+	struct uvm_object *uobj = entry->object.uvm_obj;
+	struct vm_page *pp;
+	vaddr_t start, stop;
+	UVMHIST_FUNC("uvm_shareprot"); UVMHIST_CALLED(maphist);
+
+	if (UVM_ET_ISSUBMAP(entry)) 
+		panic("uvm_shareprot: non-object attached");
+
+	start = entry->offset;
+	stop = start + (entry->end - entry->start);
+
+	/*
+	 * traverse list of pages in object.   if page in range, pmap_prot it
+	 */
+
+	for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = pp->listq.tqe_next) {
+		if (pp->offset >= start && pp->offset < stop)
+			pmap_page_protect(PMAP_PGARG(pp), prot);
+	}
+	UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+}
+
+/*
+ * uvm_pager_put: high level pageout routine
+ *
+ * we want to pageout page "pg" to backing store, clustering if
+ * possible.
+ *
+ * => page queues must be locked by caller
+ * => if page is not swap-backed, then "uobj" points to the object
+ *	backing it.   this object should be locked by the caller.
+ * => if page is swap-backed, then "uobj" should be NULL.
+ * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN
+ *    for swap-backed memory, "pg" can be NULL if there is no page
+ *    of interest [sometimes the case for the pagedaemon]
+ * => "ppsp_ptr" should point to an array of npages vm_page pointers
+ *	for possible cluster building
+ * => flags (first two for non-swap-backed pages)
+ *	PGO_ALLPAGES: all pages in uobj are valid targets
+ *	PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
+ *	PGO_SYNCIO: do SYNC I/O (no async)
+ *	PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
+ * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
+ *		  if (!uobj) start is the (daddr_t) of the starting swapblk
+ * => return state:
+ *	1. we return the VM_PAGER status code of the pageout
+ *	2. we return with the page queues unlocked
+ *	3. if (uobj != NULL) [!swap_backed] we return with
+ *		uobj locked _only_ if PGO_PDFREECLUST is set 
+ *		AND result != VM_PAGER_PEND.   in all other cases
+ *		we return with uobj unlocked.   [this is a hack
+ *		that allows the pagedaemon to save one lock/unlock
+ *		pair in the !swap_backed case since we have to
+ *		lock the uobj to drop the cluster anyway]
+ *	4. on errors we always drop the cluster.   thus, if we return
+ *		!PEND, !OK, then the caller only has to worry about
+ *		un-busying the main page (not the cluster pages).
+ *	5. on success, if !PGO_PDFREECLUST, we return the cluster
+ *		with all pages busy (caller must un-busy and check
+ *		wanted/released flags).
+ */
+
+int
+uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
+	struct uvm_object *uobj;	/* IN */
+	struct vm_page *pg, ***ppsp_ptr;/* IN, IN/OUT */
+	int *npages;			/* IN/OUT */
+	int flags;			/* IN */
+	vaddr_t start, stop;	/* IN, IN */
+{
+	int result;
+	daddr_t swblk;
+	struct vm_page **ppsp = *ppsp_ptr;
+
+	/*
+	 * note that uobj is null  if we are doing a swap-backed pageout.
+	 * note that uobj is !null if we are doing normal object pageout.
+	 * note that the page queues must be locked to cluster.
+	 */
+
+	if (uobj) {	/* if !swap-backed */
+
+		/*
+		 * attempt to build a cluster for pageout using its
+		 * make-put-cluster function (if it has one).
+		 */
+
+		if (uobj->pgops->pgo_mk_pcluster) {
+			ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp,
+			    npages, pg, flags, start, stop);
+			*ppsp_ptr = ppsp;  /* update caller's pointer */
+		} else {
+			ppsp[0] = pg;
+			*npages = 1;
+		 }
+					  
+		swblk = 0;		/* XXX: keep gcc happy */
+
+	} else {
+
+		/*
+		 * for swap-backed pageout, the caller (the pagedaemon) has
+		 * already built the cluster for us.   the starting swap
+		 * block we are writing to has been passed in as "start."
+		 * "pg" could be NULL if there is no page we are especially
+		 * interested in (in which case the whole cluster gets dropped
+		 * in the event of an error or a sync "done").
+		 */
+		swblk = (daddr_t) start;
+		/* ppsp and npages should be ok */
+	}
+
+	/* now that we've clustered we can unlock the page queues */
+	uvm_unlock_pageq();
+
+	/*
+	 * now attempt the I/O.   if we have a failure and we are
+	 * clustered, we will drop the cluster and try again.
+	 */
+
+ReTry:
+	if (uobj) {
+		/* object is locked */
+		result = uobj->pgops->pgo_put(uobj, ppsp, *npages,
+		    flags & PGO_SYNCIO);
+		/* object is now unlocked */
+	} else {
+		/* nothing locked */
+		result = uvm_swap_put(swblk, ppsp, *npages, flags & PGO_SYNCIO);
+		/* nothing locked */
+	}
+
+	/*
+	 * we have attempted the I/O.
+	 *
+	 * if the I/O was a success then:
+	 * 	if !PGO_PDFREECLUST, we return the cluster to the 
+	 *		caller (who must un-busy all pages)
+	 *	else we un-busy cluster pages for the pagedaemon
+	 *
+	 * if I/O is pending (async i/o) then we return the pending code.
+	 * [in this case the async i/o done function must clean up when
+	 *  i/o is done...]
+	 */
+
+	if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
+		if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
+			/*
+			 * drop cluster and relock object (only if I/O is
+			 * not pending)
+			 */
+			if (uobj)
+				/* required for dropcluster */
+				simple_lock(&uobj->vmobjlock);
+			if (*npages > 1 || pg == NULL)
+				uvm_pager_dropcluster(uobj, pg, ppsp, npages,
+				    PGO_PDFREECLUST, 0);
+			/* if (uobj): object still locked, as per
+			 * return-state item #3 */
+		}
+		return (result);
+	}
+
+	/*
+	 * a pager error occured.    if we have clustered, we drop the 
+	 * cluster and try again.
+	 */
+
+	if (*npages > 1 || pg == NULL) {
+		if (uobj)
+			simple_lock(&uobj->vmobjlock);
+		uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP,
+		    swblk);
+		if (pg != NULL)
+			goto ReTry;
+	}
+
+	/*
+	 * a pager error occured (even after dropping the cluster, if there
+	 * was one).    give up!   the caller only has one page ("pg")
+	 * to worry about.
+	 */
+	
+	if (uobj && (flags & PGO_PDFREECLUST) != 0)
+		simple_lock(&uobj->vmobjlock);
+	return(result);
+}
+
+/*
+ * uvm_pager_dropcluster: drop a cluster we have built (because we 
+ * got an error, or, if PGO_PDFREECLUST we are un-busying the
+ * cluster pages on behalf of the pagedaemon).
+ *
+ * => uobj, if non-null, is a non-swap-backed object that is 
+ *	locked by the caller.   we return with this object still
+ *	locked.
+ * => page queues are not locked
+ * => pg is our page of interest (the one we clustered around, can be null)
+ * => ppsp/npages is our current cluster
+ * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
+ *	pages on behalf of the pagedaemon.
+ *           PGO_REALLOCSWAP: drop previously allocated swap slots for 
+ *		clustered swap-backed pages (except for "pg" if !NULL)
+ *		"swblk" is the start of swap alloc (e.g. for ppsp[0])
+ *		[only meaningful if swap-backed (uobj == NULL)]
+ */
+
+
+void uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags, swblk)
+
+struct uvm_object *uobj;	/* IN */
+struct vm_page *pg, **ppsp;	/* IN, IN/OUT */
+int *npages;			/* IN/OUT */
+int flags;
+int swblk;			/* valid if (uobj == NULL && PGO_REALLOCSWAP) */
+
+{
+	int lcv;
+	boolean_t obj_is_alive; 
+	struct uvm_object *saved_uobj;
+
+	/*
+	 * if we need to reallocate swap space for the cluster we are dropping
+	 * (true if swap-backed and PGO_REALLOCSWAP) then free the old
+	 * allocation now.   save a block for "pg" if it is non-NULL.
+	 *
+	 * note that we will zap the object's pointer to swap in the "for" loop
+	 * below...
+	 */
+
+	if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
+		if (pg)
+			uvm_swap_free(swblk + 1, *npages - 1);
+		else
+			uvm_swap_free(swblk, *npages);
+	}
+
+	/*
+	 * drop all pages but "pg"
+	 */
+
+	for (lcv = 0 ; lcv < *npages ; lcv++) {
+
+		if (ppsp[lcv] == pg)		/* skip "pg" */
+			continue;
+	
+		/*
+		 * if swap-backed, gain lock on object that owns page.  note
+		 * that PQ_ANON bit can't change as long as we are holding
+		 * the PG_BUSY bit (so there is no need to lock the page
+		 * queues to test it).
+		 *
+		 * once we have the lock, dispose of the pointer to swap, if
+		 * requested
+		 */
+		if (!uobj) {
+			if (ppsp[lcv]->pqflags & PQ_ANON) {
+				simple_lock(&ppsp[lcv]->uanon->an_lock);
+				if (flags & PGO_REALLOCSWAP)
+					  /* zap swap block */
+					  ppsp[lcv]->uanon->an_swslot = 0;
+			} else {
+				simple_lock(&ppsp[lcv]->uobject->vmobjlock);
+				if (flags & PGO_REALLOCSWAP)
+					uao_set_swslot(ppsp[lcv]->uobject,
+					    ppsp[lcv]->offset >> PAGE_SHIFT, 0);
+			}
+		}
+
+		/* did someone want the page while we had it busy-locked? */
+		if (ppsp[lcv]->flags & PG_WANTED)
+			/* still holding obj lock */
+			thread_wakeup(ppsp[lcv]);
+
+		/* if page was released, release it.  otherwise un-busy it */
+		if (ppsp[lcv]->flags & PG_RELEASED) {
+
+			if (ppsp[lcv]->pqflags & PQ_ANON) {
+				/* so that anfree will free */
+				ppsp[lcv]->flags &= ~(PG_BUSY);
+				UVM_PAGE_OWN(ppsp[lcv], NULL);
+
+				pmap_page_protect(PMAP_PGARG(ppsp[lcv]),
+				    VM_PROT_NONE); /* be safe */
+				simple_unlock(&ppsp[lcv]->uanon->an_lock);
+				/* kills anon and frees pg */
+				uvm_anfree(ppsp[lcv]->uanon);
+
+				continue;
+			}
+
+			/*
+			 * pgo_releasepg will dump the page for us
+			 */
+
+#ifdef DIAGNOSTIC
+			if (ppsp[lcv]->uobject->pgops->pgo_releasepg == NULL)
+				panic("uvm_pager_dropcluster: no releasepg "
+				    "function");
+#endif
+			saved_uobj = ppsp[lcv]->uobject;
+			obj_is_alive =
+			    saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL);
+			
+#ifdef DIAGNOSTIC
+			/* for normal objects, "pg" is still PG_BUSY by us,
+			 * so obj can't die */
+			if (uobj && !obj_is_alive)
+				panic("uvm_pager_dropcluster: object died "
+				    "with active page");
+#endif
+			/* only unlock the object if it is still alive...  */
+			if (obj_is_alive && saved_uobj != uobj)
+				simple_unlock(&saved_uobj->vmobjlock);
+
+			/*
+			 * XXXCDC: suppose uobj died in the pgo_releasepg?
+			 * how pass that
+			 * info up to caller.  we are currently ignoring it...
+			 */
+
+			continue;		/* next page */
+
+		} else {
+			ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(ppsp[lcv], NULL);
+		}
+
+		/*
+		 * if we are operating on behalf of the pagedaemon and we 
+		 * had a successful pageout update the page!
+		 */
+		if (flags & PGO_PDFREECLUST) {
+			/* XXX: with PMAP_NEW ref should already be clear,
+			 * but don't trust! */
+			pmap_clear_reference(PMAP_PGARG(ppsp[lcv]));
+			pmap_clear_modify(PMAP_PGARG(ppsp[lcv]));
+			ppsp[lcv]->flags |= PG_CLEAN;
+		}
+
+		/* if anonymous cluster, unlock object and move on */
+		if (!uobj) {
+			if (ppsp[lcv]->pqflags & PQ_ANON)
+				simple_unlock(&ppsp[lcv]->uanon->an_lock);
+			else
+				simple_unlock(&ppsp[lcv]->uobject->vmobjlock);
+		}
+
+	}
+
+	/*
+	 * drop to a cluster of 1 page ("pg") if requested
+	 */
+
+	if (pg && (flags & PGO_PDFREECLUST) == 0) {
+		/*
+		 * if we are not a successful pageout, we make a 1 page cluster.
+		 */
+		ppsp[0] = pg;
+		*npages = 1;
+
+		/*
+		 * assign new swap block to new cluster, if anon backed
+		 */
+		if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
+			if (pg->pqflags & PQ_ANON) {
+				simple_lock(&pg->uanon->an_lock);
+				pg->uanon->an_swslot = swblk;	/* reassign */
+				simple_unlock(&pg->uanon->an_lock);
+			} else {
+				simple_lock(&pg->uobject->vmobjlock);
+				uao_set_swslot(pg->uobject,
+				    pg->offset >> PAGE_SHIFT, swblk);
+				simple_unlock(&pg->uobject->vmobjlock);
+			}
+		}
+	}
+}
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
new file mode 100644
index 00000000000..f48082e4b44
--- /dev/null
+++ b/sys/uvm/uvm_pager.h
@@ -0,0 +1,158 @@
+/*	$NetBSD: uvm_pager.h,v 1.7 1998/08/13 02:11:03 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager.h,v 1.1.2.14 1998/01/13 19:00:50 chuck Exp
+ */
+
+#ifndef _UVM_UVM_PAGER_H_
+#define _UVM_UVM_PAGER_H_
+
+/*
+ * uvm_pager.h
+ */
+
+/*
+ * async pager i/o descriptor structure
+ */
+
+TAILQ_HEAD(uvm_aiohead, uvm_aiodesc);
+
+struct uvm_aiodesc {
+	void (*aiodone) __P((struct uvm_aiodesc *));
+						/* aio done function */
+	vaddr_t kva;			/* KVA of mapped page(s) */
+	int npages;				/* # of pages in I/O req */
+	void *pd_ptr;				/* pager-dependent pointer */
+	TAILQ_ENTRY(uvm_aiodesc) aioq;		/* linked list of aio's */
+};
+
+/*
+ * pager ops
+ */
+
+struct uvm_pagerops {
+	void		(*pgo_init) __P((void));/* init pager */
+	struct uvm_object * (*pgo_attach)	/* get uvm_object */
+			__P((void *, vm_prot_t));
+	void		(*pgo_reference)	/* add reference to obj */
+			 __P((struct uvm_object *));		
+	void			(*pgo_detach)	/* drop reference to obj */
+			 __P((struct uvm_object *));
+	int			(*pgo_fault)	/* special nonstd fault fn */
+			 __P((struct uvm_faultinfo *, vaddr_t,
+				 vm_page_t *, int, int, vm_fault_t,
+				 vm_prot_t, int));
+	boolean_t		(*pgo_flush)	/* flush pages out of obj */
+			 __P((struct uvm_object *, vaddr_t, 
+				vaddr_t, int));
+	int			(*pgo_get)	/* get/read page */
+			 __P((struct uvm_object *, vaddr_t, 
+				 vm_page_t *, int *, int, vm_prot_t, int, int));
+	int			(*pgo_asyncget)	/* start async get */
+			 __P((struct uvm_object *, vaddr_t, int));
+	int			(*pgo_put)	/* put/write page */
+			 __P((struct uvm_object *, vm_page_t *, 
+				 int, boolean_t));
+	void			(*pgo_cluster)	/* return range of cluster */
+			__P((struct uvm_object *, vaddr_t, vaddr_t *,
+				vaddr_t *));
+	struct vm_page **	(*pgo_mk_pcluster)	/* make "put" cluster */
+			 __P((struct uvm_object *, struct vm_page **,
+				 int *, struct vm_page *, int, vaddr_t,
+				 vaddr_t));
+	void			(*pgo_shareprot)	/* share protect */
+			 __P((vm_map_entry_t, vm_prot_t));
+	void			(*pgo_aiodone)		/* async iodone */
+			 __P((struct uvm_aiodesc *));
+	boolean_t		(*pgo_releasepg)	/* release page */
+			 __P((struct vm_page *, struct vm_page **));
+};
+
+/* pager flags [mostly for flush] */
+
+#define PGO_CLEANIT	0x001	/* write dirty pages to backing store */
+#define PGO_SYNCIO	0x002	/* if PGO_CLEAN: use sync I/O? */
+/*
+ * obviously if neither PGO_INVALIDATE or PGO_FREE are set then the pages
+ * stay where they are.
+ */
+#define PGO_DEACTIVATE	0x004	/* deactivate flushed pages */
+#define PGO_FREE	0x008	/* free flushed pages */
+
+#define PGO_ALLPAGES	0x010	/* flush whole object/get all pages */
+#define PGO_DOACTCLUST	0x020	/* flag to mk_pcluster to include active */
+#define PGO_LOCKED	0x040	/* fault data structures are locked [get] */
+#define PGO_PDFREECLUST	0x080	/* daemon's free cluster flag [uvm_pager_put] */
+#define PGO_REALLOCSWAP	0x100	/* reallocate swap area [pager_dropcluster] */
+
+/* page we are not interested in getting */
+#define PGO_DONTCARE ((struct vm_page *) -1)	/* [get only] */
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_PAGER_INLINE
+#define PAGER_INLINE static __inline
+#else 
+#define PAGER_INLINE /* nothing */
+#endif /* UVM_PAGER_INLINE */
+
+/*
+ * prototypes
+ */
+
+void		uvm_pager_dropcluster __P((struct uvm_object *, 
+					struct vm_page *, struct vm_page **, 
+					int *, int, int));
+void		uvm_pager_init __P((void));
+int		uvm_pager_put __P((struct uvm_object *, struct vm_page *, 
+				   struct vm_page ***, int *, int, 
+				   vaddr_t, vaddr_t));
+
+PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t));
+
+vaddr_t	uvm_pagermapin __P((struct vm_page **, int, 
+				    struct uvm_aiodesc **, int));
+void		uvm_pagermapout __P((vaddr_t, int));
+struct vm_page **uvm_mk_pcluster  __P((struct uvm_object *, struct vm_page **,
+				       int *, struct vm_page *, int, 
+				       vaddr_t, vaddr_t));
+void		uvm_shareprot __P((vm_map_entry_t, vm_prot_t));
+
+
+#endif /* _UVM_UVM_PAGER_H_ */
diff --git a/sys/uvm/uvm_pager_i.h b/sys/uvm/uvm_pager_i.h
new file mode 100644
index 00000000000..7e8e8675df7
--- /dev/null
+++ b/sys/uvm/uvm_pager_i.h
@@ -0,0 +1,73 @@
+/*	$NetBSD: uvm_pager_i.h,v 1.6 1998/08/13 02:11:03 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp 
+ */
+
+#ifndef _UVM_UVM_PAGER_I_H_
+#define _UVM_UVM_PAGER_I_H_
+
+/*
+ * uvm_pager_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_PAGER_INLINE) || defined(UVM_PAGER)
+
+/*
+ * uvm_pageratop: convert KVAs in the pager map back to their page
+ * structures.
+ */
+
+PAGER_INLINE struct vm_page *
+uvm_pageratop(kva)
+	vaddr_t kva;
+{
+	paddr_t pa;
+ 
+	pa = pmap_extract(pmap_kernel(), kva);
+	if (pa == 0)
+		panic("uvm_pageratop");
+	return (PHYS_TO_VM_PAGE(pa));
+} 
+
+#endif /* defined(UVM_PAGER_INLINE) || defined(UVM_PAGER) */
+
+#endif /* _UVM_UVM_PAGER_I_H_ */
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
new file mode 100644
index 00000000000..f1b0fcc327d
--- /dev/null
+++ b/sys/uvm/uvm_pdaemon.c
@@ -0,0 +1,1012 @@
+/*	$NetBSD: uvm_pdaemon.c,v 1.12 1998/11/04 07:06:05 chs Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pageout.c        8.5 (Berkeley) 2/14/94
+ * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_pdaemon.c: the page daemon
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * local prototypes
+ */
+
+static void		uvmpd_scan __P((void));
+static boolean_t	uvmpd_scan_inactive __P((struct pglist *));
+static void		uvmpd_tune __P((void));
+
+
+/*
+ * uvm_wait: wait (sleep) for the page daemon to free some pages
+ *
+ * => should be called with all locks released
+ * => should _not_ be called by the page daemon (to avoid deadlock)
+ */
+
+void uvm_wait(wmsg)
+	char *wmsg;
+{
+	int timo = 0;
+	int s = splbio();
+
+	/*
+	 * check for page daemon going to sleep (waiting for itself)
+	 */
+
+	if (curproc == uvm.pagedaemon_proc) {
+		/*
+		 * now we have a problem: the pagedaemon wants to go to
+		 * sleep until it frees more memory.   but how can it
+		 * free more memory if it is asleep?  that is a deadlock.
+		 * we have two options:
+		 *  [1] panic now
+		 *  [2] put a timeout on the sleep, thus causing the
+		 *      pagedaemon to only pause (rather than sleep forever)
+		 *
+		 * note that option [2] will only help us if we get lucky
+		 * and some other process on the system breaks the deadlock
+		 * by exiting or freeing memory (thus allowing the pagedaemon
+		 * to continue).  for now we panic if DEBUG is defined,
+		 * otherwise we hope for the best with option [2] (better
+		 * yet, this should never happen in the first place!).
+		 */
+
+		printf("pagedaemon: deadlock detected!\n");
+		timo = hz >> 3;		/* set timeout */
+#if defined(DEBUG)
+		/* DEBUG: panic so we can debug it */
+		panic("pagedaemon deadlock");
+#endif
+	}
+
+	simple_lock(&uvm.pagedaemon_lock);
+	thread_wakeup(&uvm.pagedaemon);		/* wake the daemon! */
+	UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
+	    timo);
+
+	splx(s);
+}
+
+
+/*
+ * uvmpd_tune: tune paging parameters
+ *
+ * => called when ever memory is added (or removed?) to the system
+ * => caller must call with page queues locked
+ */
+
+static void
+uvmpd_tune()
+{
+	UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
+
+	uvmexp.freemin = uvmexp.npages / 20;
+
+	/* between 16k and 256k */
+	/* XXX:  what are these values good for? */
+	uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
+	uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT);
+
+	uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
+	if (uvmexp.freetarg <= uvmexp.freemin)
+		uvmexp.freetarg = uvmexp.freemin + 1;
+
+	/* uvmexp.inactarg: computed in main daemon loop */
+
+	uvmexp.wiredmax = uvmexp.npages / 3;
+	UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
+	      uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
+}
+
+/*
+ * uvm_pageout: the main loop for the pagedaemon
+ */
+
+void
+uvm_pageout()
+{
+	int npages = 0;
+	int s;
+	struct uvm_aiodesc *aio, *nextaio;
+	UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
+	 
+	UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
+
+	/*
+	 * ensure correct priority and set paging parameters...
+	 */
+
+	uvm.pagedaemon_proc = curproc;
+	(void) spl0();
+	uvm_lock_pageq();
+	npages = uvmexp.npages;
+	uvmpd_tune();
+	uvm_unlock_pageq();
+
+	/*
+	 * main loop
+	 */
+	while (TRUE) {
+
+		/*
+		 * carefully attempt to go to sleep (without losing "wakeups"!).
+		 * we need splbio because we want to make sure the aio_done list
+		 * is totally empty before we go to sleep.
+		 */
+
+		s = splbio();
+		simple_lock(&uvm.pagedaemon_lock);
+
+		/*
+		 * if we've got done aio's, then bypass the sleep
+		 */
+
+		if (uvm.aio_done.tqh_first == NULL) {
+			UVMHIST_LOG(maphist,"  <<SLEEPING>>",0,0,0,0);
+			UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
+			    &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
+			uvmexp.pdwoke++;
+			UVMHIST_LOG(pdhist,"  <<WOKE UP>>",0,0,0,0);
+
+			/* relock pagedaemon_lock, still at splbio */
+			simple_lock(&uvm.pagedaemon_lock);
+		}
+
+		/*
+		 * check for done aio structures
+		 */
+
+		aio = uvm.aio_done.tqh_first;	/* save current list (if any)*/
+		if (aio) {
+			TAILQ_INIT(&uvm.aio_done);	/* zero global list */
+		}
+
+		simple_unlock(&uvm.pagedaemon_lock);	/* unlock */
+		splx(s);				/* drop splbio */
+ 
+		/*
+		 * first clear out any pending aios (to free space in case we
+		 * want to pageout more stuff).
+		 */
+
+		for (/*null*/; aio != NULL ; aio = nextaio) {
+
+			uvmexp.paging -= aio->npages;
+			nextaio = aio->aioq.tqe_next;
+			aio->aiodone(aio);
+
+		}
+
+		/* Next, drain pool resources */
+		pool_drain(0);
+
+		/*
+		 * now lock page queues and recompute inactive count
+		 */
+		uvm_lock_pageq();
+
+		if (npages != uvmexp.npages) {	/* check for new pages? */
+			npages = uvmexp.npages;
+			uvmpd_tune();
+		}
+
+		uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
+		if (uvmexp.inactarg <= uvmexp.freetarg)
+			uvmexp.inactarg = uvmexp.freetarg + 1;
+
+		UVMHIST_LOG(pdhist,"  free/ftarg=%d/%d, inact/itarg=%d/%d",
+		    uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
+		    uvmexp.inactarg);
+
+		/*
+		 * scan if needed
+		 * [XXX: note we are reading uvm.free without locking]
+		 */
+		if (uvmexp.free < uvmexp.freetarg ||
+		    uvmexp.inactive < uvmexp.inactarg)
+			uvmpd_scan();
+
+		/*
+		 * done scan.  unlock page queues (the only lock we are holding)
+		 */
+		uvm_unlock_pageq();
+
+		/*
+		 * done!    restart loop.
+		 */
+		thread_wakeup(&uvmexp.free);
+	}
+	/*NOTREACHED*/
+}
+
+/*
+ * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
+ * 	its own function for ease of reading.
+ *
+ * => called with page queues locked
+ * => we work on meeting our free target by converting inactive pages
+ *    into free pages.
+ * => we handle the building of swap-backed clusters
+ * => we return TRUE if we are exiting because we met our target
+ */
+
+static boolean_t
+uvmpd_scan_inactive(pglst)
+	struct pglist *pglst;
+{
+	boolean_t retval = FALSE;	/* assume we haven't hit target */
+	int s, free, result;
+	struct vm_page *p, *nextpg;
+	struct uvm_object *uobj;
+	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+	int npages;
+	struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; 	/* XXX: see below */
+	int swnpages, swcpages;				/* XXX: see below */
+	int swslot, oldslot;
+	struct vm_anon *anon;
+	boolean_t swap_backed;
+	vaddr_t start;
+	UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * note: we currently keep swap-backed pages on a seperate inactive
+	 * list from object-backed pages.   however, merging the two lists
+	 * back together again hasn't been ruled out.   thus, we keep our
+	 * swap cluster in "swpps" rather than in pps (allows us to mix
+	 * clustering types in the event of a mixed inactive queue).
+	 */
+
+	/*
+	 * swslot is non-zero if we are building a swap cluster.  we want
+	 * to stay in the loop while we have a page to scan or we have 
+	 * a swap-cluster to build.
+	 */
+	swslot = 0;
+	swnpages = swcpages = 0;
+	free = 0;
+
+	for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
+
+		/*
+		 * note that p can be NULL iff we have traversed the whole
+		 * list and need to do one final swap-backed clustered pageout.
+		 */
+		if (p) {
+			/*
+			 * update our copy of "free" and see if we've met
+			 * our target
+			 */
+			s = splimp();
+			uvm_lock_fpageq();
+			free = uvmexp.free;
+			uvm_unlock_fpageq();
+			splx(s);
+
+			if (free >= uvmexp.freetarg) {
+				UVMHIST_LOG(pdhist,"  met free target: "
+				    "exit loop", 0, 0, 0, 0);
+				retval = TRUE;		/* hit the target! */
+
+				if (swslot == 0)
+					/* exit now if no swap-i/o pending */
+					break;
+
+				/* set p to null to signal final swap i/o */
+				p = NULL;
+			}
+		}
+
+		uobj = NULL;	/* be safe and shut gcc up */
+		anon = NULL;	/* be safe and shut gcc up */
+
+		if (p) {	/* if (we have a new page to consider) */
+			/*
+			 * we are below target and have a new page to consider.
+			 */
+			uvmexp.pdscans++;
+			nextpg = p->pageq.tqe_next;
+
+			/*
+			 * move referenced pages back to active queue and
+			 * skip to next page (unlikely to happen since
+			 * inactive pages shouldn't have any valid mappings
+			 * and we cleared reference before deactivating).
+			 */
+			if (pmap_is_referenced(PMAP_PGARG(p))) {
+				uvm_pageactivate(p);
+				uvmexp.pdreact++;
+				continue;
+			}
+			
+			/*
+			 * first we attempt to lock the object that this page
+			 * belongs to.  if our attempt fails we skip on to
+			 * the next page (no harm done).  it is important to
+			 * "try" locking the object as we are locking in the
+			 * wrong order (pageq -> object) and we don't want to
+			 * get deadlocked.
+			 *
+			 * the only time we exepct to see an ownerless page
+			 * (i.e. a page with no uobject and !PQ_ANON) is if an
+			 * anon has loaned a page from a uvm_object and the
+			 * uvm_object has dropped the ownership.  in that
+			 * case, the anon can "take over" the loaned page
+			 * and make it its own.
+			 */
+		
+			/* is page part of an anon or ownerless ? */
+			if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
+
+				anon = p->uanon;
+
+#ifdef DIAGNOSTIC
+				/* to be on inactive q, page must be part
+				 * of _something_ */
+				if (anon == NULL)
+					panic("pagedaemon: page with no anon "
+					    "or object detected - loop 1");
+#endif
+
+				if (!simple_lock_try(&anon->an_lock))
+					/* lock failed, skip this page */
+					continue;
+
+				/*
+				 * if the page is ownerless, claim it in the
+				 * name of "anon"!
+				 */
+				if ((p->pqflags & PQ_ANON) == 0) {
+#ifdef DIAGNOSTIC
+					if (p->loan_count < 1)
+						panic("pagedaemon: non-loaned "
+						    "ownerless page detected -"
+						    " loop 1");
+#endif
+					p->loan_count--;
+					p->pqflags |= PQ_ANON;      /* anon now owns it */
+				}
+
+				if (p->flags & PG_BUSY) {
+					simple_unlock(&anon->an_lock);
+					uvmexp.pdbusy++;
+					/* someone else owns page, skip it */
+					continue;
+				}
+
+				uvmexp.pdanscan++;
+
+			} else {
+
+				uobj = p->uobject;
+
+				if (!simple_lock_try(&uobj->vmobjlock))
+					/* lock failed, skip this page */
+					continue;	
+
+				if (p->flags & PG_BUSY) {
+					simple_unlock(&uobj->vmobjlock);
+					uvmexp.pdbusy++;
+					/* someone else owns page, skip it */
+					continue;	
+				}
+
+				uvmexp.pdobscan++;
+			}
+
+			/*
+			 * we now have the object and the page queues locked.
+			 * the page is not busy.   if the page is clean we
+			 * can free it now and continue.
+			 */
+
+			if (p->flags & PG_CLEAN) {
+				/* zap all mappings with pmap_page_protect... */
+				pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
+				uvm_pagefree(p);
+				uvmexp.pdfreed++;
+			
+				if (anon) {
+#ifdef DIAGNOSTIC
+					/*
+					 * an anonymous page can only be clean
+					 * if it has valid backing store.
+					 */
+					if (anon->an_swslot == 0)
+						panic("pagedaemon: clean anon "
+						 "page without backing store?");
+#endif
+					/* remove from object */
+					anon->u.an_page = NULL;
+					simple_unlock(&anon->an_lock);
+				} else {
+					/* pagefree has already removed the
+					 * page from the object */
+					simple_unlock(&uobj->vmobjlock);
+				}
+				continue;
+			}
+
+			/*
+			 * this page is dirty, skip it if we'll have met our
+			 * free target when all the current pageouts complete.
+			 */
+			if (free + uvmexp.paging > uvmexp.freetarg)
+			{
+				if (anon) {
+					simple_unlock(&anon->an_lock);
+				} else {
+					simple_unlock(&uobj->vmobjlock);
+				}
+				continue;
+			}
+
+			/*
+			 * the page we are looking at is dirty.   we must
+			 * clean it before it can be freed.  to do this we
+			 * first mark the page busy so that no one else will
+			 * touch the page.   we write protect all the mappings
+			 * of the page so that no one touches it while it is
+			 * in I/O.
+			 */
+		
+			swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
+			p->flags |= PG_BUSY;		/* now we own it */
+			UVM_PAGE_OWN(p, "scan_inactive");
+			pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
+			uvmexp.pgswapout++;
+
+			/*
+			 * for swap-backed pages we need to (re)allocate
+			 * swap space.
+			 */
+			if (swap_backed) {
+
+				/*
+				 * free old swap slot (if any)
+				 */
+				if (anon) {
+					if (anon->an_swslot) {
+						uvm_swap_free(anon->an_swslot,
+						    1);
+						anon->an_swslot = 0;
+					}
+				} else {
+					oldslot = uao_set_swslot(uobj,
+					    p->offset >> PAGE_SHIFT, 0);
+
+					if (oldslot)
+						uvm_swap_free(oldslot, 1);
+				}
+
+				/*
+				 * start new cluster (if necessary)
+				 */
+				if (swslot == 0) {
+					/* want this much */
+					swnpages = MAXBSIZE >> PAGE_SHIFT;
+
+					swslot = uvm_swap_alloc(&swnpages,
+					    TRUE);
+
+					if (swslot == 0) {
+						/* no swap?  give up! */
+						p->flags &= ~PG_BUSY;
+						UVM_PAGE_OWN(p, NULL);
+						if (anon)
+							simple_unlock(
+							    &anon->an_lock);
+						else
+							simple_unlock(
+							    &uobj->vmobjlock);
+						continue;
+					}
+					swcpages = 0;	/* cluster is empty */
+				}
+
+				/*
+				 * add block to cluster
+				 */
+				swpps[swcpages] = p;
+				uvmexp.pgswapout++;
+				if (anon)
+					anon->an_swslot = swslot + swcpages;
+				else
+					uao_set_swslot(uobj,
+					    p->offset >> PAGE_SHIFT,
+					    swslot + swcpages);
+				swcpages++;
+
+				/* done (swap-backed) */
+			}
+
+			/* end: if (p) ["if we have new page to consider"] */ 
+		} else {
+
+			/* if p == NULL we must be doing a last swap i/o */
+			swap_backed = TRUE;
+		}
+
+		/*
+		 * now consider doing the pageout.   
+		 *
+		 * for swap-backed pages, we do the pageout if we have either 
+		 * filled the cluster (in which case (swnpages == swcpages) or 
+		 * run out of pages (p == NULL).
+		 *
+		 * for object pages, we always do the pageout.
+		 */
+		if (swap_backed) {
+
+			if (p) {	/* if we just added a page to cluster */
+				if (anon)
+					simple_unlock(&anon->an_lock);
+				else
+					simple_unlock(&uobj->vmobjlock);
+
+				/* cluster not full yet? */
+				if (swcpages < swnpages)
+					continue;
+			}
+
+			/* starting I/O now... set up for it */
+			npages = swcpages;
+			ppsp = swpps;
+			/* for swap-backed pages only */
+			start = (vaddr_t) swslot;
+
+			/* if this is final pageout we could have a few
+			 * extra swap blocks */
+			if (swcpages < swnpages) {
+				uvm_swap_free(swslot + swcpages,
+				    (swnpages - swcpages));
+			} 
+	
+		} else {
+
+			/* normal object pageout */
+			ppsp = pps;
+			npages = sizeof(pps) / sizeof(struct vm_page *);
+			/* not looked at because PGO_ALLPAGES is set */
+			start = 0;
+
+		}
+
+		/*
+		 * now do the pageout.
+		 * 
+		 * for swap_backed pages we have already built the cluster.
+		 * for !swap_backed pages, uvm_pager_put will call the object's
+		 * "make put cluster" function to build a cluster on our behalf.
+		 *
+		 * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
+		 * it to free the cluster pages for us on a successful I/O (it
+		 * always does this for un-successful I/O requests).  this
+		 * allows us to do clustered pageout without having to deal
+		 * with cluster pages at this level.
+		 *
+		 * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
+		 *  IN: locked: uobj (if !swap_backed), page queues
+		 * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
+		 *     !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
+		 *
+		 * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
+		 */
+
+		/* locked: uobj (if !swap_backed), page queues */
+		uvmexp.pdpageouts++;
+		result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
+		    &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
+		/* locked: uobj (if !swap_backed && result != PEND) */
+		/* unlocked: pageqs, object (if swap_backed ||result == PEND) */
+
+		/*
+		 * if we did i/o to swap, zero swslot to indicate that we are
+		 * no longer building a swap-backed cluster.
+		 */
+
+		if (swap_backed)
+			swslot = 0;		/* done with this cluster */
+
+		/*
+		 * first, we check for VM_PAGER_PEND which means that the
+		 * async I/O is in progress and the async I/O done routine
+		 * will clean up after us.   in this case we move on to the
+		 * next page.
+		 *
+		 * there is a very remote chance that the pending async i/o can
+		 * finish _before_ we get here.   if that happens, our page "p"
+		 * may no longer be on the inactive queue.   so we verify this
+		 * when determining the next page (starting over at the head if
+		 * we've lost our inactive page).
+		 */
+
+		if (result == VM_PAGER_PEND) {
+			uvmexp.paging += npages;
+			uvm_lock_pageq();		/* relock page queues */
+			uvmexp.pdpending++;
+			if (p) {
+				if (p->pqflags & PQ_INACTIVE)
+					/* reload! */
+					nextpg = p->pageq.tqe_next;
+				else
+					/* reload! */
+					nextpg = pglst->tqh_first;
+				} else {
+					nextpg = NULL;		/* done list */
+			}
+			continue;
+		}
+
+		/*
+		 * clean up "p" if we have one
+		 */
+
+		if (p) {
+			/*
+			 * the I/O request to "p" is done and uvm_pager_put
+			 * has freed any cluster pages it may have allocated
+			 * during I/O.  all that is left for us to do is
+			 * clean up page "p" (which is still PG_BUSY).
+			 *
+			 * our result could be one of the following:
+			 *   VM_PAGER_OK: successful pageout
+			 *
+			 *   VM_PAGER_AGAIN: tmp resource shortage, we skip
+			 *     to next page
+			 *   VM_PAGER_{FAIL,ERROR,BAD}: an error.   we
+			 *     "reactivate" page to get it out of the way (it
+			 *     will eventually drift back into the inactive
+			 *     queue for a retry).
+			 *   VM_PAGER_UNLOCK: should never see this as it is
+			 *     only valid for "get" operations
+			 */
+
+			/* relock p's object: page queues not lock yet, so
+			 * no need for "try" */
+
+			/* !swap_backed case: already locked... */
+			if (swap_backed) {
+				if (anon)
+					simple_lock(&anon->an_lock);
+				else
+					simple_lock(&uobj->vmobjlock);
+			}
+
+#ifdef DIAGNOSTIC
+			if (result == VM_PAGER_UNLOCK)
+				panic("pagedaemon: pageout returned "
+				    "invalid 'unlock' code");
+#endif
+
+			/* handle PG_WANTED now */
+			if (p->flags & PG_WANTED)
+				/* still holding object lock */
+				thread_wakeup(p);
+
+			p->flags &= ~(PG_BUSY|PG_WANTED);
+			UVM_PAGE_OWN(p, NULL);
+
+			/* released during I/O? */
+			if (p->flags & PG_RELEASED) {
+				if (anon) {
+					/* remove page so we can get nextpg */
+					anon->u.an_page = NULL;
+
+					simple_unlock(&anon->an_lock);
+					uvm_anfree(anon);	/* kills anon */
+					pmap_page_protect(PMAP_PGARG(p),
+					    VM_PROT_NONE);
+					anon = NULL;
+					uvm_lock_pageq();
+					nextpg = p->pageq.tqe_next;
+					/* free released page */
+					uvm_pagefree(p);
+
+				} else {
+
+#ifdef DIAGNOSTIC
+					if (uobj->pgops->pgo_releasepg == NULL)
+						panic("pagedaemon: no "
+						   "pgo_releasepg function");
+#endif
+
+					/* 
+					 * pgo_releasepg nukes the page and
+					 * gets "nextpg" for us.  it returns
+					 * with the page queues locked (when
+					 * given nextpg ptr).
+					 */
+					if (!uobj->pgops->pgo_releasepg(p,
+					    &nextpg))
+						/* uobj died after release */
+						uobj = NULL;
+
+					/*
+					 * lock page queues here so that they're
+					 * always locked at the end of the loop.
+					 */
+					uvm_lock_pageq();
+				}
+
+			} else {	/* page was not released during I/O */
+
+				uvm_lock_pageq();
+				nextpg = p->pageq.tqe_next;
+
+				if (result != VM_PAGER_OK) {
+
+					/* pageout was a failure... */
+					if (result != VM_PAGER_AGAIN)
+						uvm_pageactivate(p);
+					pmap_clear_reference(PMAP_PGARG(p));
+					/* XXXCDC: if (swap_backed) FREE p's
+					 * swap block? */
+
+				} else {
+
+					/* pageout was a success... */
+					pmap_clear_reference(PMAP_PGARG(p));
+					pmap_clear_modify(PMAP_PGARG(p));
+					p->flags |= PG_CLEAN;
+					/* XXX: could free page here, but old
+					 * pagedaemon does not */
+
+				}
+			}
+			
+			/*
+			 * drop object lock (if there is an object left).   do
+			 * a safety check of nextpg to make sure it is on the
+			 * inactive queue (it should be since PG_BUSY pages on
+			 * the inactive queue can't be re-queued [note: not
+			 * true for active queue]).
+			 */
+
+			if (anon)
+				simple_unlock(&anon->an_lock);
+			else if (uobj)
+				simple_unlock(&uobj->vmobjlock);
+
+		} /* if (p) */ else {
+
+			/* if p is null in this loop, make sure it stays null
+			 * in next loop */
+			nextpg = NULL;
+			
+			/*
+			 * lock page queues here just so they're always locked
+			 * at the end of the loop.
+			 */
+			uvm_lock_pageq();
+		}
+
+		if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
+			printf("pagedaemon: invalid nextpg!   reverting to "
+			    "queue head\n");
+			nextpg = pglst->tqh_first;	/* reload! */
+		}
+
+	}	/* end of "inactive" 'for' loop */
+	return (retval);
+}
+
+/*
+ * uvmpd_scan: scan the page queues and attempt to meet our targets.
+ *
+ * => called with pageq's locked
+ */
+
+void
+uvmpd_scan()
+{
+	int s, free, pages_freed, page_shortage;
+	struct vm_page *p, *nextpg;
+	struct uvm_object *uobj;
+	boolean_t got_it;
+	UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
+
+	uvmexp.pdrevs++;		/* counter */
+
+#ifdef __GNUC__
+	uobj = NULL;	/* XXX gcc */
+#endif
+	/*
+	 * get current "free" page count
+	 */
+	s = splimp();
+	uvm_lock_fpageq();
+	free = uvmexp.free;
+	uvm_unlock_fpageq();
+	splx(s);
+
+#ifndef __SWAP_BROKEN
+	/*
+	 * swap out some processes if we are below our free target.
+	 * we need to unlock the page queues for this.
+	 */
+	if (free < uvmexp.freetarg) {
+
+		uvmexp.pdswout++;
+		UVMHIST_LOG(pdhist,"  free %d < target %d: swapout", free,
+		    uvmexp.freetarg, 0, 0);
+		uvm_unlock_pageq();
+		uvm_swapout_threads();
+		pmap_update();		/* update so we can scan inactive q */
+		uvm_lock_pageq();
+
+	}
+#endif
+
+	/*
+	 * now we want to work on meeting our targets.   first we work on our
+	 * free target by converting inactive pages into free pages.  then
+	 * we work on meeting our inactive target by converting active pages
+	 * to inactive ones.
+	 */
+
+	UVMHIST_LOG(pdhist, "  starting 'free' loop",0,0,0,0);
+	pages_freed = uvmexp.pdfreed;	/* so far... */
+
+	/*
+	 * do loop #1!   alternate starting queue between swap and object based
+	 * on the low bit of uvmexp.pdrevs (which we bump by one each call).
+	 */
+
+	got_it = FALSE;
+	if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
+		got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
+	if (!got_it)
+		got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
+	if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
+		(void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
+
+	/*
+	 * we have done the scan to get free pages.   now we work on meeting
+	 * our inactive target.
+	 */
+
+	page_shortage = uvmexp.inactarg - uvmexp.inactive;
+	pages_freed = uvmexp.pdfreed - pages_freed; /* # pages freed in loop */
+	if (page_shortage <= 0 && pages_freed == 0)
+		page_shortage = 1;
+
+	UVMHIST_LOG(pdhist, "  second loop: page_shortage=%d", page_shortage,
+	    0, 0, 0);
+	for (p = uvm.page_active.tqh_first ; 
+	    p != NULL && page_shortage > 0 ; p = nextpg) {
+		nextpg = p->pageq.tqe_next;
+		if (p->flags & PG_BUSY)
+			continue;	/* quick check before trying to lock */
+
+		/*
+		 * lock owner
+		 */
+		/* is page anon owned or ownerless? */
+		if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
+
+#ifdef DIAGNOSTIC
+			if (p->uanon == NULL)
+				panic("pagedaemon: page with no anon or "
+				    "object detected - loop 2");
+#endif
+
+			if (!simple_lock_try(&p->uanon->an_lock))
+				continue;
+
+			/* take over the page? */
+			if ((p->pqflags & PQ_ANON) == 0) {
+
+#ifdef DIAGNOSTIC
+				if (p->loan_count < 1)
+					panic("pagedaemon: non-loaned "
+					    "ownerless page detected - loop 2");
+#endif
+
+				p->loan_count--;
+				p->pqflags |= PQ_ANON;
+			}
+
+		} else {
+
+			if (!simple_lock_try(&p->uobject->vmobjlock))
+				continue;
+
+		}
+
+		if ((p->flags & PG_BUSY) == 0) {
+			pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
+			/* no need to check wire_count as pg is "active" */
+			uvm_pagedeactivate(p);
+			uvmexp.pddeact++;
+			page_shortage--;
+		}
+
+		if (p->pqflags & PQ_ANON)
+			simple_unlock(&p->uanon->an_lock);
+		else
+			simple_unlock(&p->uobject->vmobjlock);
+	}
+
+	/*
+	 * done scan
+	 */
+}
diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h
new file mode 100644
index 00000000000..c5aad80ef81
--- /dev/null
+++ b/sys/uvm/uvm_pdaemon.h
@@ -0,0 +1,86 @@
+/*	$NetBSD: uvm_pdaemon.h,v 1.5 1998/02/10 14:12:28 mrg Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/* 
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.  
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles D. Cranor,
+ *      Washington University, the University of California, Berkeley and 
+ *      its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)vm_pageout.h        8.2 (Berkeley) 1/12/94
+ * from: Id: uvm_pdaemon.h,v 1.1.2.4 1998/02/02 20:07:20 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ * 
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ * 
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ * 
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PDAEMON_H_
+#define _UVM_UVM_PDAEMON_H_
+
+/*
+ * uvm_pdaemon.h: page daemon hooks
+ */
+
+/*
+ * prototypes
+ */
+
+void uvm_wait __P((char *));
+
+#endif /* _UVM_UVM_PDAEMON_H_ */
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
new file mode 100644
index 00000000000..d7fe645ebeb
--- /dev/null
+++ b/sys/uvm/uvm_pglist.c
@@ -0,0 +1,292 @@
+/*	$NetBSD: uvm_pglist.c,v 1.6 1998/08/13 02:11:03 eeh Exp $	*/
+
+#define VM_PAGE_ALLOC_MEMORY_STATS
+ 
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *  
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center.  
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by the NetBSD
+ *      Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *      
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_pglist.c: pglist functions
+ *
+ * XXX: was part of uvm_page but has an incompatable copyright so it
+ * gets its own file now.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+#ifdef VM_PAGE_ALLOC_MEMORY_STATS
+#define	STAT_INCR(v)	(v)++
+#define	STAT_DECR(v)	do { \
+		if ((v) == 0) \
+			printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
+		else \
+			(v)--; \
+	} while (0)
+u_long	uvm_pglistalloc_npages;
+#else
+#define	STAT_INCR(v)
+#define	STAT_DECR(v)
+#endif
+
+/*
+ * uvm_pglistalloc: allocate a list of pages
+ *
+ * => allocated pages are placed at the tail of rlist.  rlist is
+ *    assumed to be properly initialized by caller.
+ * => returns 0 on success or errno on failure
+ * => XXX: implementation allocates only a single segment, also
+ *	might be able to better advantage of vm_physeg[].
+ * => doesn't take into account clean non-busy pages on inactive list
+ *	that could be used(?)
+ * => params:
+ *	size		the size of the allocation, rounded to page size.
+ *	low		the low address of the allowed allocation range.
+ *	high		the high address of the allowed allocation range.
+ *	alignment	memory must be aligned to this power-of-two boundary.
+ *	boundary	no segment in the allocation may cross this 
+ *			power-of-two boundary (relative to zero).
+ */
+
+int
+uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
+	psize_t size;
+	paddr_t low, high, alignment, boundary;
+	struct pglist *rlist;
+	int nsegs, waitok;
+{
+	paddr_t try, idxpa, lastidxpa;
+	int psi;
+	struct vm_page *pgs;
+	int s, tryidx, idx, end, error, free_list;
+	vm_page_t m;
+	u_long pagemask;
+#ifdef DEBUG
+	vm_page_t tp;
+#endif
+
+#ifdef DIAGNOSTIC
+	if ((alignment & (alignment - 1)) != 0)
+		panic("vm_page_alloc_memory: alignment must be power of 2");
+
+	if ((boundary & (boundary - 1)) != 0)
+		panic("vm_page_alloc_memory: boundary must be power of 2");
+#endif
+	
+	/*
+	 * Our allocations are always page granularity, so our alignment
+	 * must be, too.
+	 */
+	if (alignment < PAGE_SIZE)
+		alignment = PAGE_SIZE;
+
+	size = round_page(size);
+	try = roundup(low, alignment);
+
+	if (boundary != 0 && boundary < size)
+		return (EINVAL);
+
+	pagemask = ~(boundary - 1);
+
+	/* Default to "lose". */
+	error = ENOMEM;
+
+	/*
+	 * Block all memory allocation and lock the free list.
+	 */
+	s = splimp();
+	uvm_lock_fpageq();            /* lock free page queue */
+
+	/* Are there even any free pages? */
+	for (idx = 0; idx < VM_NFREELIST; idx++)
+		if (uvm.page_free[idx].tqh_first != NULL)
+			break;
+	if (idx == VM_NFREELIST)
+		goto out;
+
+	for (;; try += alignment) {
+		if (try + size > high) {
+			/*
+			 * We've run past the allowable range.
+			 */
+			goto out;
+		}
+
+		/*
+		 * Make sure this is a managed physical page.
+		 */
+
+		if ((psi = vm_physseg_find(atop(try), &idx)) == -1)
+			continue; /* managed? */
+		if (vm_physseg_find(atop(try + size), NULL) != psi)
+			continue; /* end must be in this segment */
+
+		tryidx = idx;
+		end = idx + (size / PAGE_SIZE);
+		pgs = vm_physmem[psi].pgs;
+
+		/*
+		 * Found a suitable starting page.  See of the range is free.
+		 */
+		for (; idx < end; idx++) {
+			if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
+				/*
+				 * Page not available.
+				 */
+				break;
+			}
+
+			idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
+
+			if (idx > tryidx) {
+				lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
+
+				if ((lastidxpa + PAGE_SIZE) != idxpa) {
+					/*
+					 * Region not contiguous.
+					 */
+					break;
+				}
+				if (boundary != 0 &&
+				    ((lastidxpa ^ idxpa) & pagemask) != 0) {
+					/*
+					 * Region crosses boundary.
+					 */
+					break;
+				}
+			}
+		}
+
+		if (idx == end) {
+			/*
+			 * Woo hoo!  Found one.
+			 */
+			break;
+		}
+	}
+
+	/*
+	 * we have a chunk of memory that conforms to the requested constraints.
+	 */
+	idx = tryidx;
+	while (idx < end) {
+		m = &pgs[idx];
+		free_list = uvm_page_lookup_freelist(m);
+#ifdef DEBUG
+		for (tp = uvm.page_free[free_list].tqh_first;
+		     tp != NULL; tp = tp->pageq.tqe_next) {
+			if (tp == m)
+				break;
+		}
+		if (tp == NULL)
+			panic("uvm_pglistalloc: page not on freelist");
+#endif
+		TAILQ_REMOVE(&uvm.page_free[free_list], m, pageq);
+		uvmexp.free--;
+		m->flags = PG_CLEAN;
+		m->pqflags = 0;
+		m->uobject = NULL;
+		m->uanon = NULL;
+		m->wire_count = 0;
+		m->loan_count = 0;
+		TAILQ_INSERT_TAIL(rlist, m, pageq);
+		idx++;
+		STAT_INCR(uvm_pglistalloc_npages);
+	}
+	error = 0;
+
+out:
+	uvm_unlock_fpageq();
+	splx(s);
+
+	/*
+	 * check to see if we need to generate some free pages waking
+	 * the pagedaemon.
+	 * XXX: we read uvm.free without locking
+	 */
+	 
+	if (uvmexp.free < uvmexp.freemin ||
+	    (uvmexp.free < uvmexp.freetarg &&
+	    uvmexp.inactive < uvmexp.inactarg)) 
+		thread_wakeup(&uvm.pagedaemon);
+
+	return (error);
+}
+
+/*
+ * uvm_pglistfree: free a list of pages
+ *
+ * => pages should already be unmapped
+ */
+
+void
+uvm_pglistfree(list)
+	struct pglist *list;
+{
+	vm_page_t m;
+	int s;
+
+	/*
+	 * Block all memory allocation and lock the free list.
+	 */
+	s = splimp();
+	uvm_lock_fpageq();
+
+	while ((m = list->tqh_first) != NULL) {
+#ifdef DIAGNOSTIC
+		if (m->pqflags & (PQ_ACTIVE|PQ_INACTIVE))
+			panic("uvm_pglistfree: active/inactive page!");
+#endif
+		TAILQ_REMOVE(list, m, pageq);
+		m->pqflags = PQ_FREE;
+		TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(m)],
+		    m, pageq);
+		uvmexp.free++;
+		STAT_DECR(uvm_pglistalloc_npages);
+	}
+
+	uvm_unlock_fpageq();
+	splx(s);
+}
diff --git a/sys/uvm/uvm_stat.c b/sys/uvm/uvm_stat.c
new file mode 100644
index 00000000000..fbe3139c116
--- /dev/null
+++ b/sys/uvm/uvm_stat.c
@@ -0,0 +1,253 @@
+/*	$NetBSD: uvm_stat.c,v 1.10 1998/06/20 13:19:00 mrg Exp $	 */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_stat.c,v 1.1.2.3 1997/12/19 15:01:00 mrg Exp
+ */
+
+/*
+ * uvm_stat.c
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * globals
+ */
+
+struct uvm_cnt *uvm_cnt_head = NULL;
+
+#ifdef UVMHIST
+struct uvm_history_head uvm_histories;
+#endif
+
+#ifdef UVMHIST_PRINT
+int uvmhist_print_enabled = 1;
+#endif
+
+/*
+ * prototypes
+ */
+
+#ifdef UVMHIST
+void uvmhist_dump __P((struct uvm_history *));
+void uvm_hist __P((u_int32_t));
+static void uvmhist_dump_histories __P((struct uvm_history *[]));
+#endif
+void uvmcnt_dump __P((void));
+void uvm_dump   __P((void));
+
+
+#ifdef UVMHIST
+/* call this from ddb */
+void
+uvmhist_dump(l)
+	struct uvm_history *l;
+{
+	int lcv, s;
+
+	s = splhigh();
+	lcv = l->f;
+	do {
+		if (l->e[lcv].fmt)
+			uvmhist_print(&l->e[lcv]);
+		lcv = (lcv + 1) % l->n;
+	} while (lcv != l->f);
+	splx(s);
+}
+
+/*
+ * print a merged list of uvm_history structures
+ */
+static void
+uvmhist_dump_histories(hists)
+	struct uvm_history *hists[];
+{
+	struct timeval  tv;
+	int	cur[MAXHISTS];
+	int	s, lcv, hi;
+
+	/* so we don't get corrupted lists! */
+	s = splhigh();
+
+	/* find the first of each list */
+	for (lcv = 0; hists[lcv]; lcv++)
+		 cur[lcv] = hists[lcv]->f;
+
+	/*
+	 * here we loop "forever", finding the next earliest
+	 * history entry and printing it.  cur[X] is the current
+	 * entry to test for the history in hists[X].  if it is
+	 * -1, then this history is finished.
+	 */
+	for (;;) {
+		hi = -1;
+		tv.tv_sec = tv.tv_usec = 0;
+
+		/* loop over each history */
+		for (lcv = 0; hists[lcv]; lcv++) {
+restart:
+			if (cur[lcv] == -1)
+				continue;
+
+			/*
+			 * if the format is empty, go to the next entry
+			 * and retry.
+			 */
+			if (hists[lcv]->e[cur[lcv]].fmt == NULL) {
+				cur[lcv] = (cur[lcv] + 1) % (hists[lcv]->n);
+				if (cur[lcv] == hists[lcv]->f)
+					cur[lcv] = -1;
+				goto restart;
+			}
+				
+			/*
+			 * if the time hasn't been set yet, or this entry is
+			 * earlier than the current tv, set the time and history
+			 * index.
+			 */
+			if (tv.tv_sec == 0 ||
+			    timercmp(&hists[lcv]->e[cur[lcv]].tv, &tv, <)) {
+				tv = hists[lcv]->e[cur[lcv]].tv;
+				hi = lcv;
+			}
+		}
+
+		/* if we didn't find any entries, we must be done */
+		if (hi == -1)
+			break;
+
+		/* print and move to the next entry */
+		uvmhist_print(&hists[hi]->e[cur[hi]]);
+		cur[hi] = (cur[hi] + 1) % (hists[hi]->n);
+		if (cur[hi] == hists[hi]->f)
+			cur[hi] = -1;
+	}
+	
+	/* done! */
+	splx(s);
+}
+
+/*
+ * call this from ddb.  `bitmask' is from <uvm/uvm_stat.h>.  it
+ * merges the named histories.
+ */
+void
+uvm_hist(bitmask)
+	u_int32_t	bitmask;	/* XXX only support 32 hists */
+{
+	struct uvm_history *hists[MAXHISTS + 1];
+	int i = 0;
+
+	if ((bitmask & UVMHIST_MAPHIST) || bitmask == 0)
+		hists[i++] = &maphist;
+
+	if ((bitmask & UVMHIST_PDHIST) || bitmask == 0)
+		hists[i++] = &pdhist;
+
+	hists[i] = NULL;
+
+	uvmhist_dump_histories(hists);
+}
+#endif /* UVMHIST */
+
+void
+uvmcnt_dump()
+{
+	struct uvm_cnt *uvc = uvm_cnt_head;
+
+	while (uvc) {
+		if ((uvc->t & UVMCNT_MASK) != UVMCNT_CNT)
+			continue;
+		printf("%s = %d\n", uvc->name, uvc->c);
+		uvc = uvc->next;
+	}
+}
+
+/*
+ * uvm_dump: ddb hook to dump interesting uvm counters
+ */
+void 
+uvm_dump()
+{
+
+	printf("Current UVM status:\n");
+	printf("  pagesize=%d (0x%x), pagemask=0x%x, pageshift=%d\n",
+	    uvmexp.pagesize, uvmexp.pagesize, uvmexp.pagemask,
+	    uvmexp.pageshift);
+	printf("  %d VM pages: %d active, %d inactive, %d wired, %d free\n",
+	    uvmexp.npages, uvmexp.active, uvmexp.inactive, uvmexp.wired,
+	    uvmexp.free);
+	printf("  freemin=%d, free-target=%d, inactive-target=%d, "
+	    "wired-max=%d\n", uvmexp.freemin, uvmexp.freetarg, uvmexp.inactarg,
+	    uvmexp.wiredmax);
+	printf("  faults=%d, traps=%d, intrs=%d, ctxswitch=%d\n",
+	    uvmexp.faults, uvmexp.traps, uvmexp.intrs, uvmexp.swtch);
+	printf("  softint=%d, syscalls=%d, swapins=%d, swapouts=%d\n",
+	    uvmexp.softs, uvmexp.syscalls, uvmexp.swapins, uvmexp.swapouts);
+
+	printf("  fault counts:\n");
+	printf("    noram=%d, noanon=%d, pgwait=%d, pgrele=%d\n",
+	    uvmexp.fltnoram, uvmexp.fltnoanon, uvmexp.fltpgwait,
+	    uvmexp.fltpgrele);
+	printf("    ok relocks(total)=%d(%d), anget(retrys)=%d(%d), "
+	    "amapcopy=%d\n", uvmexp.fltrelckok, uvmexp.fltrelck,
+	    uvmexp.fltanget, uvmexp.fltanretry, uvmexp.fltamcopy);
+	printf("    neighbor anon/obj pg=%d/%d, gets(lock/unlock)=%d/%d\n",
+	    uvmexp.fltnamap, uvmexp.fltnomap, uvmexp.fltlget, uvmexp.fltget);
+	printf("    cases: anon=%d, anoncow=%d, obj=%d, prcopy=%d, przero=%d\n",
+	    uvmexp.flt_anon, uvmexp.flt_acow, uvmexp.flt_obj, uvmexp.flt_prcopy,
+	    uvmexp.flt_przero);
+
+	printf("  daemon and swap counts:\n");
+	printf("    woke=%d, revs=%d, scans=%d, swout=%d\n", uvmexp.pdwoke,
+	    uvmexp.pdrevs, uvmexp.pdscans, uvmexp.pdswout);
+	printf("    busy=%d, freed=%d, reactivate=%d, deactivate=%d\n",
+	    uvmexp.pdbusy, uvmexp.pdfreed, uvmexp.pdreact, uvmexp.pddeact);
+	printf("    pageouts=%d, pending=%d, nswget=%d\n", uvmexp.pdpageouts,
+	    uvmexp.pdpending, uvmexp.nswget);
+	printf("    nswapdev=%d, nanon=%d, nfreeanon=%d\n", uvmexp.nswapdev,
+	    uvmexp.nanon, uvmexp.nfreeanon);
+
+	printf("  kernel pointers:\n");
+	printf("    objs(kern/kmem/mb)=%p/%p/%p\n", uvm.kernel_object,
+	    uvmexp.kmem_object, uvmexp.mb_object);
+}
diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h
new file mode 100644
index 00000000000..62ce32fe46e
--- /dev/null
+++ b/sys/uvm/uvm_stat.h
@@ -0,0 +1,245 @@
+/*	$NetBSD: uvm_stat.h,v 1.13 1998/08/09 22:36:39 perry Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_stat.h,v 1.1.2.4 1998/02/07 01:16:56 chs Exp
+ */
+
+#ifndef _UVM_UVM_STAT_H_
+#define _UVM_UVM_STAT_H_
+
+#include <sys/queue.h>
+
+/*
+ * uvm_stat: monitor what is going on with uvm (or whatever)
+ */
+
+/*
+ * counters  [XXX: maybe replace event counters with this]
+ */
+
+#define UVMCNT_MASK	0xf			/* rest are private */
+#define UVMCNT_CNT	0			/* normal counter */
+#define UVMCNT_DEV	1			/* device event counter */
+
+struct uvm_cnt {
+	int c;					/* the value */
+	int t;					/* type */
+	struct uvm_cnt *next;			/* global list of cnts */
+	char *name;				/* counter name */
+	void *p;				/* private data */
+};
+
+extern struct uvm_cnt *uvm_cnt_head;
+
+/*
+ * counter operations.  assume spl is set ok.
+ */
+
+#define UVMCNT_INIT(CNT,TYP,VAL,NAM,PRIV) \
+do { \
+	CNT.c = VAL; \
+	CNT.t = TYP; \
+	CNT.next = uvm_cnt_head; \
+	uvm_cnt_head = &CNT; \
+	CNT.name = NAM; \
+	CNT.p = PRIV; \
+} while (0)
+
+#define UVMCNT_SET(C,V) \
+do { \
+	(C).c = (V); \
+} while (0)
+
+#define UVMCNT_ADD(C,V) \
+do { \
+	(C).c += (V); \
+} while (0)
+
+#define UVMCNT_INCR(C) UVMCNT_ADD(C,1)
+#define UVMCNT_DECR(C) UVMCNT_ADD(C,-1)
+
+
+/*
+ * history/tracing
+ */
+
+struct uvm_history_ent {
+	struct timeval tv; 		/* time stamp */
+	char *fmt; 			/* printf format */
+	size_t fmtlen;			/* length of printf format */
+	char *fn;			/* function name */
+	size_t fnlen;			/* length of function name */
+	u_long call;			/* function call number */
+	u_long v[4];			/* values */
+};
+
+struct uvm_history {
+	const char *name;		/* name of this this history */
+	size_t namelen;			/* length of name, not including null */
+	LIST_ENTRY(uvm_history) list;	/* link on list of all histories */
+	int n;				/* number of entries */
+	int f; 				/* next free one */
+	simple_lock_data_t l;		/* lock on this history */
+	struct uvm_history_ent *e;	/* the malloc'd entries */
+};
+
+LIST_HEAD(uvm_history_head, uvm_history);
+
+/*
+ * grovelling lists all at once.  we currently do not allow more than
+ * 32 histories to exist, as the way to dump a number of them at once
+ * is by calling uvm_hist() with a bitmask.
+ */
+
+/* this is used to set the size of some arrays */
+#define	MAXHISTS		32	/* do not change this! */
+
+/* and these are the bit values of each history */
+#define	UVMHIST_MAPHIST		0x00000001	/* maphist */
+#define	UVMHIST_PDHIST		0x00000002	/* pdhist */
+
+/*
+ * macros to use the history/tracing code.  note that UVMHIST_LOG
+ * must take 4 arguments (even if they are ignored by the format).
+ */
+#ifndef UVMHIST
+#define UVMHIST_DECL(NAME)
+#define UVMHIST_INIT(NAME,N)
+#define UVMHIST_INIT_STATIC(NAME,BUF)
+#define UVMHIST_LOG(NAME,FMT,A,B,C,D)
+#define UVMHIST_CALLED(NAME)
+#define UVMHIST_FUNC(FNAME)
+#define uvmhist_dump(NAME)
+#else
+extern	struct uvm_history_head uvm_histories;
+
+#define UVMHIST_DECL(NAME) struct uvm_history NAME
+
+#define UVMHIST_INIT(NAME,N) \
+do { \
+	(NAME).name = __STRING(NAME); \
+	(NAME).namelen = strlen((NAME).name); \
+	(NAME).n = (N); \
+	(NAME).f = 0; \
+	simple_lock_init(&(NAME).l); \
+	(NAME).e = (struct uvm_history_ent *) \
+		malloc(sizeof(struct uvm_history_ent) * (N), M_TEMP, \
+		    M_WAITOK); \
+	bzero((NAME).e, sizeof(struct uvm_history_ent) * (N)); \
+	LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \
+} while (0)
+
+#define UVMHIST_INIT_STATIC(NAME,BUF) \
+do { \
+	(NAME).name = __STRING(NAME); \
+	(NAME).namelen = strlen((NAME).name); \
+	(NAME).n = sizeof(BUF) / sizeof(struct uvm_history_ent); \
+	(NAME).f = 0; \
+	simple_lock_init(&(NAME).l); \
+	(NAME).e = (struct uvm_history_ent *) (BUF); \
+	bzero((NAME).e, sizeof(struct uvm_history_ent) * (NAME).n); \
+	LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \
+} while (0)
+
+extern int cold;
+
+#if defined(UVMHIST_PRINT)
+extern int uvmhist_print_enabled;
+#define UVMHIST_PRINTNOW(E) \
+do { \
+		if (uvmhist_print_enabled) { \
+			uvmhist_print(E); \
+			DELAY(100000); \
+		} \
+} while (0)
+#else
+#define UVMHIST_PRINTNOW(E) /* nothing */
+#endif
+
+#define UVMHIST_LOG(NAME,FMT,A,B,C,D) \
+do { \
+	register int i, s = splhigh(); \
+	simple_lock(&(NAME).l); \
+	i = (NAME).f; \
+	(NAME).f = (i + 1) % (NAME).n; \
+	simple_unlock(&(NAME).l); \
+	splx(s); \
+	if (!cold) \
+		microtime(&(NAME).e[i].tv); \
+	(NAME).e[i].fmt = (FMT); \
+	(NAME).e[i].fmtlen = strlen((NAME).e[i].fmt); \
+	(NAME).e[i].fn = _uvmhist_name; \
+	(NAME).e[i].fnlen = strlen((NAME).e[i].fn); \
+	(NAME).e[i].call = _uvmhist_call; \
+	(NAME).e[i].v[0] = (u_long)(A); \
+	(NAME).e[i].v[1] = (u_long)(B); \
+	(NAME).e[i].v[2] = (u_long)(C); \
+	(NAME).e[i].v[3] = (u_long)(D); \
+	UVMHIST_PRINTNOW(&((NAME).e[i])); \
+} while (0)
+
+#define UVMHIST_CALLED(NAME) \
+do { \
+	{ \
+		int s = splhigh(); \
+		simple_lock(&(NAME).l); \
+		_uvmhist_call = _uvmhist_cnt++; \
+		simple_unlock(&(NAME).l); \
+		splx(s); \
+	} \
+	UVMHIST_LOG(NAME,"called!", 0, 0, 0, 0); \
+} while (0)
+
+#define UVMHIST_FUNC(FNAME) \
+	static int _uvmhist_cnt = 0; \
+	static char *_uvmhist_name = FNAME; \
+	int _uvmhist_call; 
+
+static __inline void uvmhist_print __P((struct uvm_history_ent *));
+
+static __inline void
+uvmhist_print(e)
+	struct uvm_history_ent *e;
+{
+	printf("%06ld.%06ld ", e->tv.tv_sec, e->tv.tv_usec);
+	printf("%s#%ld: ", e->fn, e->call);
+	printf(e->fmt, e->v[0], e->v[1], e->v[2], e->v[3]);
+	printf("\n");
+}
+#endif /* UVMHIST */
+
+#endif /* _UVM_UVM_STAT_H_ */
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
new file mode 100644
index 00000000000..9fb7611e7a5
--- /dev/null
+++ b/sys/uvm/uvm_swap.c
@@ -0,0 +1,1977 @@
+/*	$NetBSD: uvm_swap.c,v 1.23 1998/12/26 06:25:59 marc Exp $	*/
+
+/*
+ * Copyright (c) 1995, 1996, 1997 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
+ * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/disklabel.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/extent.h>
+#include <sys/mount.h>
+#include <sys/pool.h>
+#include <sys/syscallargs.h>
+#include <sys/swap.h>
+
+#include <vm/vm.h>
+#include <vm/vm_conf.h>
+
+#include <uvm/uvm.h>
+
+#include <miscfs/specfs/specdev.h>
+
+/*
+ * uvm_swap.c: manage configuration and i/o to swap space.
+ */
+
+/*
+ * swap space is managed in the following way:
+ * 
+ * each swap partition or file is described by a "swapdev" structure.
+ * each "swapdev" structure contains a "swapent" structure which contains
+ * information that is passed up to the user (via system calls).
+ *
+ * each swap partition is assigned a "priority" (int) which controls
+ * swap parition usage.
+ *
+ * the system maintains a global data structure describing all swap
+ * partitions/files.   there is a sorted LIST of "swappri" structures
+ * which describe "swapdev"'s at that priority.   this LIST is headed
+ * by the "swap_priority" global var.    each "swappri" contains a 
+ * CIRCLEQ of "swapdev" structures at that priority.
+ *
+ * the system maintains a fixed pool of "swapbuf" structures for use
+ * at swap i/o time.  a swapbuf includes a "buf" structure and an 
+ * "aiodone" [we want to avoid malloc()'ing anything at swapout time
+ * since memory may be low].
+ *
+ * locking:
+ *  - swap_syscall_lock (sleep lock): this lock serializes the swapctl
+ *    system call and prevents the swap priority list from changing
+ *    while we are in the middle of a system call (e.g. SWAP_STATS).
+ *  - swap_data_lock (simple_lock): this lock protects all swap data
+ *    structures including the priority list, the swapdev structures,
+ *    and the swapmap extent.
+ *  - swap_buf_lock (simple_lock): this lock protects the free swapbuf
+ *    pool.
+ *
+ * each swap device has the following info:
+ *  - swap device in use (could be disabled, preventing future use)
+ *  - swap enabled (allows new allocations on swap)
+ *  - map info in /dev/drum
+ *  - vnode pointer
+ * for swap files only:
+ *  - block size
+ *  - max byte count in buffer
+ *  - buffer
+ *  - credentials to use when doing i/o to file
+ *
+ * userland controls and configures swap with the swapctl(2) system call.
+ * the sys_swapctl performs the following operations:
+ *  [1] SWAP_NSWAP: returns the number of swap devices currently configured
+ *  [2] SWAP_STATS: given a pointer to an array of swapent structures 
+ *	(passed in via "arg") of a size passed in via "misc" ... we load
+ *	the current swap config into the array.
+ *  [3] SWAP_ON: given a pathname in arg (could be device or file) and a
+ *	priority in "misc", start swapping on it.
+ *  [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
+ *  [5] SWAP_CTL: changes the priority of a swap device (new priority in
+ *	"misc")
+ */
+
+/*
+ * SWAP_TO_FILES: allows swapping to plain files.
+ */
+
+#define SWAP_TO_FILES
+
+/*
+ * swapdev: describes a single swap partition/file
+ *
+ * note the following should be true:
+ * swd_inuse <= swd_nblks  [number of blocks in use is <= total blocks]
+ * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel]
+ */
+struct swapdev {
+	struct oswapent swd_ose;
+#define	swd_dev		swd_ose.ose_dev		/* device id */
+#define	swd_flags	swd_ose.ose_flags	/* flags:inuse/enable/fake */
+#define	swd_priority	swd_ose.ose_priority	/* our priority */
+	/* also: swd_ose.ose_nblks, swd_ose.ose_inuse */
+	char			*swd_path;	/* saved pathname of device */
+	int			swd_pathlen;	/* length of pathname */
+	int			swd_npages;	/* #pages we can use */
+	int			swd_npginuse;	/* #pages in use */
+	int			swd_drumoffset;	/* page0 offset in drum */
+	int			swd_drumsize;	/* #pages in drum */
+	struct extent		*swd_ex;	/* extent for this swapdev */
+	struct vnode		*swd_vp;	/* backing vnode */
+	CIRCLEQ_ENTRY(swapdev)	swd_next;	/* priority circleq */
+
+#ifdef SWAP_TO_FILES
+	int			swd_bsize;	/* blocksize (bytes) */
+	int			swd_maxactive;	/* max active i/o reqs */
+	struct buf		swd_tab;	/* buffer list */
+	struct ucred		*swd_cred;	/* cred for file access */
+#endif
+};
+
+/*
+ * swap device priority entry; the list is kept sorted on `spi_priority'.
+ */
+struct swappri {
+	int			spi_priority;     /* priority */
+	CIRCLEQ_HEAD(spi_swapdev, swapdev)	spi_swapdev;
+	/* circleq of swapdevs at this priority */
+	LIST_ENTRY(swappri)	spi_swappri;      /* global list of pri's */
+};
+
+/*
+ * swapbuf, swapbuffer plus async i/o info
+ */
+struct swapbuf {
+	struct buf sw_buf;		/* a buffer structure */
+	struct uvm_aiodesc sw_aio;	/* aiodesc structure, used if ASYNC */
+	SIMPLEQ_ENTRY(swapbuf) sw_sq;	/* free list pointer */
+};
+
+/*
+ * The following two structures are used to keep track of data transfers
+ * on swap devices associated with regular files.
+ * NOTE: this code is more or less a copy of vnd.c; we use the same
+ * structure names here to ease porting..
+ */
+struct vndxfer {
+	struct buf	*vx_bp;		/* Pointer to parent buffer */
+	struct swapdev	*vx_sdp;
+	int		vx_error;
+	int		vx_pending;	/* # of pending aux buffers */
+	int		vx_flags;
+#define VX_BUSY		1
+#define VX_DEAD		2
+};
+
+struct vndbuf {
+	struct buf	vb_buf;
+	struct vndxfer	*vb_xfer;
+};
+
+
+/*
+ * We keep a of pool vndbuf's and vndxfer structures.
+ */
+struct pool *vndxfer_pool;
+struct pool *vndbuf_pool;
+
+#define	getvndxfer(vnx)	do {						\
+	int s = splbio();						\
+	vnx = (struct vndxfer *)					\
+		pool_get(vndxfer_pool, PR_MALLOCOK|PR_WAITOK);		\
+	splx(s);							\
+} while (0)
+
+#define putvndxfer(vnx) {						\
+	pool_put(vndxfer_pool, (void *)(vnx));				\
+}
+
+#define	getvndbuf(vbp)	do {						\
+	int s = splbio();						\
+	vbp = (struct vndbuf *)						\
+		pool_get(vndbuf_pool, PR_MALLOCOK|PR_WAITOK);		\
+	splx(s);							\
+} while (0)
+
+#define putvndbuf(vbp) {						\
+	pool_put(vndbuf_pool, (void *)(vbp));				\
+}
+
+
+/*
+ * local variables
+ */
+static struct extent *swapmap;		/* controls the mapping of /dev/drum */
+SIMPLEQ_HEAD(swapbufhead, swapbuf);
+struct pool *swapbuf_pool;
+
+/* list of all active swap devices [by priority] */
+LIST_HEAD(swap_priority, swappri);
+static struct swap_priority swap_priority;
+
+/* locks */
+lock_data_t swap_syscall_lock;
+static simple_lock_data_t swap_data_lock;
+
+/*
+ * prototypes
+ */
+#ifdef notyet
+static void		 swapdrum_add __P((struct swapdev *, int));
+#endif
+static struct swapdev	*swapdrum_getsdp __P((int));
+
+#ifdef notyet /* swapctl */
+static struct swapdev	*swaplist_find __P((struct vnode *, int));
+static void		 swaplist_insert __P((struct swapdev *, 
+					     struct swappri *, int));
+static void		 swaplist_trim __P((void));
+
+static int swap_on __P((struct proc *, struct swapdev *));
+#endif
+#ifdef SWAP_OFF_WORKS
+static int swap_off __P((struct proc *, struct swapdev *));
+#endif
+
+#ifdef SWAP_TO_FILES
+static void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
+static void sw_reg_iodone __P((struct buf *));
+static void sw_reg_start __P((struct swapdev *));
+#endif
+
+static void uvm_swap_aiodone __P((struct uvm_aiodesc *));
+static void uvm_swap_bufdone __P((struct buf *));
+static int uvm_swap_io __P((struct vm_page **, int, int, int));
+
+/*
+ * uvm_swap_init: init the swap system data structures and locks
+ *
+ * => called at boot time from init_main.c after the filesystems 
+ *	are brought up (which happens after uvm_init())
+ */
+void
+uvm_swap_init()
+{
+	UVMHIST_FUNC("uvm_swap_init");
+
+	UVMHIST_CALLED(pdhist);
+	/*
+	 * first, init the swap list, its counter, and its lock.
+	 * then get a handle on the vnode for /dev/drum by using
+	 * the its dev_t number ("swapdev", from MD conf.c).
+	 */
+
+	LIST_INIT(&swap_priority);
+	uvmexp.nswapdev = 0;
+	lockinit(&swap_syscall_lock, PVM, "swapsys", 0, 0);
+	simple_lock_init(&swap_data_lock);
+
+	if (bdevvp(swapdev, &swapdev_vp))
+		panic("uvm_swap_init: can't get vnode for swap device");
+
+	/*
+	 * create swap block resource map to map /dev/drum.   the range
+	 * from 1 to INT_MAX allows 2 gigablocks of swap space.  note
+	 * that block 0 is reserved (used to indicate an allocation 
+	 * failure, or no allocation).
+	 */
+	swapmap = extent_create("swapmap", 1, INT_MAX,
+				M_VMSWAP, 0, 0, EX_NOWAIT);
+	if (swapmap == 0)
+		panic("uvm_swap_init: extent_create failed");
+
+	/*
+	 * allocate our private pool of "swapbuf" structures (includes
+	 * a "buf" structure).  ["nswbuf" comes from param.c and can
+	 * be adjusted by MD code before we get here].
+	 */
+
+	swapbuf_pool =
+		pool_create(sizeof(struct swapbuf), 0, 0, 0, "swp buf", 0,
+			    NULL, NULL, 0);
+	if (swapbuf_pool == NULL)
+		panic("swapinit: pool_create failed");
+	/* XXX - set a maximum on swapbuf_pool? */
+
+	vndxfer_pool =
+		pool_create(sizeof(struct vndxfer), 0, 0, 0, "swp vnx", 0,
+			    NULL, NULL, 0);
+	if (vndxfer_pool == NULL)
+		panic("swapinit: pool_create failed");
+
+	vndbuf_pool =
+		pool_create(sizeof(struct vndbuf), 0, 0, 0, "swp vnd", 0,
+			    NULL, NULL, 0);
+	if (vndbuf_pool == NULL)
+		panic("swapinit: pool_create failed");
+	/*
+	 * done!
+	 */
+	UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0);
+}
+
+/*
+ * swaplist functions: functions that operate on the list of swap
+ * devices on the system.
+ */
+
+/*
+ * swaplist_insert: insert swap device "sdp" into the global list
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ * => caller must provide a newly malloc'd swappri structure (we will
+ *	FREE it if we don't need it... this it to prevent malloc blocking
+ *	here while adding swap)
+ */
+#ifdef notyet /* used by swapctl */
+static void
+swaplist_insert(sdp, newspp, priority)
+	struct swapdev *sdp;
+	struct swappri *newspp;
+	int priority;
+{
+	struct swappri *spp, *pspp;
+	UVMHIST_FUNC("swaplist_insert"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * find entry at or after which to insert the new device.
+	 */
+	for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;
+	     spp = spp->spi_swappri.le_next) {
+		if (priority <= spp->spi_priority)
+			break;
+		pspp = spp;
+	}
+
+	/*
+	 * new priority?
+	 */
+	if (spp == NULL || spp->spi_priority != priority) {
+		spp = newspp;  /* use newspp! */
+		UVMHIST_LOG(pdhist, "created new swappri = %d", priority, 0, 0, 0);
+
+		spp->spi_priority = priority;
+		CIRCLEQ_INIT(&spp->spi_swapdev);
+
+		if (pspp)
+			LIST_INSERT_AFTER(pspp, spp, spi_swappri);
+		else
+			LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
+	} else {
+	  	/* we don't need a new priority structure, free it */
+		FREE(newspp, M_VMSWAP);
+	}
+
+	/*
+	 * priority found (or created).   now insert on the priority's
+	 * circleq list and bump the total number of swapdevs.
+	 */
+	sdp->swd_priority = priority;
+	CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+	uvmexp.nswapdev++;
+
+	/*
+	 * done!
+	 */
+}
+#endif
+
+#ifdef notyet /* used by swapctl */
+/*
+ * swaplist_find: find and optionally remove a swap device from the
+ *	global list.
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ * => we return the swapdev we found (and removed)
+ */
+static struct swapdev *
+swaplist_find(vp, remove)
+	struct vnode *vp;
+	boolean_t remove;
+{
+	struct swapdev *sdp;
+	struct swappri *spp;
+
+	/*
+	 * search the lists for the requested vp
+	 */
+	for (spp = swap_priority.lh_first; spp != NULL;
+	     spp = spp->spi_swappri.le_next) {
+		for (sdp = spp->spi_swapdev.cqh_first;
+		     sdp != (void *)&spp->spi_swapdev;
+		     sdp = sdp->swd_next.cqe_next)
+			if (sdp->swd_vp == vp) {
+				if (remove) {
+					CIRCLEQ_REMOVE(&spp->spi_swapdev,
+					    sdp, swd_next);
+					uvmexp.nswapdev--;
+				}
+				return(sdp);
+			}
+	}
+	return (NULL);
+}
+
+
+/*
+ * swaplist_trim: scan priority list for empty priority entries and kill
+ *	them.
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ */
+static void
+swaplist_trim()
+{
+	struct swappri *spp, *nextspp;
+
+	for (spp = swap_priority.lh_first; spp != NULL; spp = nextspp) {
+		nextspp = spp->spi_swappri.le_next;
+		if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)
+			continue;
+		LIST_REMOVE(spp, spi_swappri);
+		free((caddr_t)spp, M_VMSWAP);
+	}
+}
+
+/*
+ * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
+ *
+ * => caller must hold swap_syscall_lock
+ * => swap_data_lock should be unlocked (we may sleep)
+ */
+static void
+swapdrum_add(sdp, npages)
+	struct swapdev *sdp;
+	int	npages;
+{
+	u_long result;
+
+	if (extent_alloc(swapmap, npages, EX_NOALIGN, EX_NOBOUNDARY,
+	    EX_WAITOK, &result))
+		panic("swapdrum_add");
+
+	sdp->swd_drumoffset = result;
+	sdp->swd_drumsize = npages;
+}
+#endif
+
+/*
+ * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
+ *	to the "swapdev" that maps that section of the drum.
+ *
+ * => each swapdev takes one big contig chunk of the drum
+ * => caller must hold swap_data_lock
+ */
+static struct swapdev *
+swapdrum_getsdp(pgno)
+	int pgno;
+{
+	struct swapdev *sdp;
+	struct swappri *spp;
+	
+	for (spp = swap_priority.lh_first; spp != NULL;
+	     spp = spp->spi_swappri.le_next)
+		for (sdp = spp->spi_swapdev.cqh_first;
+		     sdp != (void *)&spp->spi_swapdev;
+		     sdp = sdp->swd_next.cqe_next)
+			if (pgno >= sdp->swd_drumoffset &&
+			    pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
+				return sdp;
+			}
+	return NULL;
+}
+
+
+/*XXX
+ *XXX
+ *XXX*/
+int
+sys_swapon(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	return EINVAL;
+}
+
+#ifdef notyet /* XXXXXXXXXXXXXXXX (it has other bugs beside the fact that I don't want to change syscalls.master) */
+/*
+ * sys_swapctl: main entry point for swapctl(2) system call
+ * 	[with two helper functions: swap_on and swap_off]
+ */
+int
+sys_swapctl(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_swapctl_args /* {
+		syscallarg(int) cmd;
+		syscallarg(void *) arg;
+		syscallarg(int) misc;
+	} */ *uap = (struct sys_swapctl_args *)v;
+	struct vnode *vp;
+	struct nameidata nd;
+	struct swappri *spp;
+	struct swapdev *sdp;
+	struct swapent *sep;
+	char	userpath[PATH_MAX + 1];
+	size_t	len;
+	int	count, error, misc;
+	int	priority;
+	UVMHIST_FUNC("sys_swapctl"); UVMHIST_CALLED(pdhist);
+
+	misc = SCARG(uap, misc);
+
+	/*
+	 * ensure serialized syscall access by grabbing the swap_syscall_lock
+	 */
+	lockmgr(&swap_syscall_lock, LK_EXCLUSIVE, (void *)0, p);
+	
+	/*
+	 * we handle the non-priv NSWAP and STATS request first.
+	 *
+	 * SWAP_NSWAP: return number of config'd swap devices 
+	 * [can also be obtained with uvmexp sysctl]
+	 */
+	if (SCARG(uap, cmd) == SWAP_NSWAP) {
+		UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%d", uvmexp.nswapdev,
+		    0, 0, 0);
+		*retval = uvmexp.nswapdev;
+		error = 0;
+		goto out;
+	}
+
+	/*
+	 * SWAP_STATS: get stats on current # of configured swap devs
+	 *
+	 * note that the swap_priority list can't change as long 
+	 * as we are holding the swap_syscall_lock.  we don't want
+	 * to grab the swap_data_lock because we may fault&sleep during 
+	 * copyout() and we don't want to be holding that lock then!
+	 */
+	if (SCARG(uap, cmd) == SWAP_STATS
+#if defined(COMPAT_13)
+	    || SCARG(uap, cmd) == SWAP_OSTATS
+#endif
+	    ) {
+		sep = (struct swapent *)SCARG(uap, arg);
+		count = 0;
+
+		for (spp = swap_priority.lh_first; spp != NULL;
+		    spp = spp->spi_swappri.le_next) {
+			for (sdp = spp->spi_swapdev.cqh_first;
+			     sdp != (void *)&spp->spi_swapdev && misc-- > 0;
+			     sdp = sdp->swd_next.cqe_next) {
+			  	/*
+				 * backwards compatibility for system call.
+				 * note that we use 'struct oswapent' as an
+				 * overlay into both 'struct swapdev' and
+				 * the userland 'struct swapent', as we
+				 * want to retain backwards compatibility
+				 * with NetBSD 1.3.
+				 */
+				sdp->swd_ose.ose_inuse = 
+				    btodb(sdp->swd_npginuse << PAGE_SHIFT);
+				error = copyout((caddr_t)&sdp->swd_ose,
+				    (caddr_t)sep, sizeof(struct oswapent));
+
+				/* now copy out the path if necessary */
+#if defined(COMPAT_13)
+				if (error == 0 && SCARG(uap, cmd) == SWAP_STATS)
+#else
+				if (error == 0)
+#endif
+					error = copyout((caddr_t)sdp->swd_path,
+					    (caddr_t)&sep->se_path,
+					    sdp->swd_pathlen);
+
+				if (error)
+					goto out;
+				count++;
+#if defined(COMPAT_13)
+				if (SCARG(uap, cmd) == SWAP_OSTATS)
+					((struct oswapent *)sep)++;
+				else
+#endif
+					sep++;
+			}
+		}
+
+		UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0);
+
+		*retval = count;
+		error = 0;
+		goto out;
+	} 
+
+	/*
+	 * all other requests require superuser privs.   verify.
+	 */
+	if ((error = suser(p->p_ucred, &p->p_acflag)))
+		goto out;
+
+	/*
+	 * at this point we expect a path name in arg.   we will
+	 * use namei() to gain a vnode reference (vref), and lock
+	 * the vnode (VOP_LOCK).
+	 *
+	 * XXX: a NULL arg means use the root vnode pointer (e.g. for
+	 * miniroot)
+	 */
+	if (SCARG(uap, arg) == NULL) {
+		vp = rootvp;		/* miniroot */
+		if (vget(vp, LK_EXCLUSIVE)) {
+			error = EBUSY;
+			goto out;
+		}
+		if (SCARG(uap, cmd) == SWAP_ON &&
+		    copystr("miniroot", userpath, sizeof userpath, &len))
+			panic("swapctl: miniroot copy failed");
+	} else {
+		int	space;
+		char	*where;
+
+		if (SCARG(uap, cmd) == SWAP_ON) {
+			if ((error = copyinstr(SCARG(uap, arg), userpath,
+			    sizeof userpath, &len)))
+				goto out;
+			space = UIO_SYSSPACE;
+			where = userpath;
+		} else {
+			space = UIO_USERSPACE;
+			where = (char *)SCARG(uap, arg);
+		}
+		NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
+		if ((error = namei(&nd)))
+			goto out;
+		vp = nd.ni_vp;
+	}
+	/* note: "vp" is referenced and locked */
+
+	error = 0;		/* assume no error */
+	switch(SCARG(uap, cmd)) {
+	case SWAP_CTL:
+		/*
+		 * get new priority, remove old entry (if any) and then
+		 * reinsert it in the correct place.  finally, prune out
+		 * any empty priority structures.
+		 */
+		priority = SCARG(uap, misc);
+		spp = (struct swappri *)
+			malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+		simple_lock(&swap_data_lock);
+		if ((sdp = swaplist_find(vp, 1)) == NULL) {
+			error = ENOENT;
+		} else {
+			swaplist_insert(sdp, spp, priority);
+			swaplist_trim();
+		}
+		simple_unlock(&swap_data_lock);
+		if (error)
+			free(spp, M_VMSWAP);
+		break;
+
+	case SWAP_ON:
+		/*
+		 * check for duplicates.   if none found, then insert a
+		 * dummy entry on the list to prevent someone else from
+		 * trying to enable this device while we are working on
+		 * it.
+		 */
+		priority = SCARG(uap, misc);
+		simple_lock(&swap_data_lock);
+		if ((sdp = swaplist_find(vp, 0)) != NULL) {
+			error = EBUSY;
+			simple_unlock(&swap_data_lock);
+			break;
+		}
+		sdp = (struct swapdev *)
+			malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+		spp = (struct swappri *)
+			malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+		bzero(sdp, sizeof(*sdp));
+		sdp->swd_flags = SWF_FAKE;	/* placeholder only */
+		sdp->swd_vp = vp;
+		sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
+#ifdef SWAP_TO_FILES
+		/*
+		 * XXX Is NFS elaboration necessary?
+		 */
+		if (vp->v_type == VREG)
+			sdp->swd_cred = crdup(p->p_ucred);
+#endif
+		swaplist_insert(sdp, spp, priority);
+		simple_unlock(&swap_data_lock);
+
+		sdp->swd_pathlen = len;
+		sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
+		if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0)
+			panic("swapctl: copystr");
+		/*
+		 * we've now got a FAKE placeholder in the swap list.
+		 * now attempt to enable swap on it.  if we fail, undo
+		 * what we've done and kill the fake entry we just inserted.
+		 * if swap_on is a success, it will clear the SWF_FAKE flag
+		 */
+		if ((error = swap_on(p, sdp)) != 0) {
+			simple_lock(&swap_data_lock);
+			(void) swaplist_find(vp, 1);  /* kill fake entry */
+			swaplist_trim();
+			simple_unlock(&swap_data_lock);
+#ifdef SWAP_TO_FILES
+			if (vp->v_type == VREG)
+				crfree(sdp->swd_cred);
+#endif
+			free(sdp->swd_path, M_VMSWAP);
+			free((caddr_t)sdp, M_VMSWAP);
+			break;
+		}
+
+		/*
+		 * got it!   now add a second reference to vp so that
+		 * we keep a reference to the vnode after we return.
+		 */
+		vref(vp);
+		break;
+
+	case SWAP_OFF:
+		UVMHIST_LOG(pdhist, "someone is using SWAP_OFF...??", 0,0,0,0);
+#ifdef SWAP_OFF_WORKS
+		/*
+		 * find the entry of interest and ensure it is enabled.
+		 */
+		simple_lock(&swap_data_lock);
+		if ((sdp = swaplist_find(vp, 0)) == NULL) {
+			simple_unlock(&swap_data_lock);
+			error = ENXIO;
+			break;
+		}
+		/*
+		 * If a device isn't in use or enabled, we
+		 * can't stop swapping from it (again).
+		 */
+		if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
+			simple_unlock(&swap_data_lock);
+			error = EBUSY;
+			break;
+		}
+		/* XXXCDC: should we call with list locked or unlocked? */
+		if ((error = swap_off(p, sdp)) != 0)
+			break;
+		/* XXXCDC: might need relock here */
+
+		/*
+		 * now we can kill the entry.
+		 */
+		if ((sdp = swaplist_find(vp, 1)) == NULL) {
+			error = ENXIO;
+			break;
+		}
+		simple_unlock(&swap_data_lock);
+		free((caddr_t)sdp, M_VMSWAP);
+#else
+		error = EINVAL;
+#endif
+		break;
+
+	default:
+		UVMHIST_LOG(pdhist, "unhandled command: %#x",
+		    SCARG(uap, cmd), 0, 0, 0);
+		error = EINVAL;
+	}
+
+	/*
+	 * done!   use vput to drop our reference and unlock
+	 */
+	vput(vp);
+out:
+	lockmgr(&swap_syscall_lock, LK_RELEASE, (void *)0, p);
+
+	UVMHIST_LOG(pdhist, "<- done!  error=%d", error, 0, 0, 0);
+	return (error);
+}
+#endif
+
+
+/*
+ * swap_on: attempt to enable a swapdev for swapping.   note that the
+ *	swapdev is already on the global list, but disabled (marked
+ *	SWF_FAKE).
+ *
+ * => we avoid the start of the disk (to protect disk labels)
+ * => we also avoid the miniroot, if we are swapping to root.
+ * => caller should leave swap_data_lock unlocked, we may lock it
+ *	if needed.
+ */
+#ifdef notyet /* used by swapctl */
+static int
+swap_on(p, sdp)
+	struct proc *p;
+	struct swapdev *sdp;
+{
+	static int count = 0;	/* static */
+	struct vnode *vp;
+	int error, npages, nblocks, size;
+	long addr;
+#ifdef SWAP_TO_FILES
+	struct vattr va;
+#endif
+#ifdef NFS
+	extern int (**nfsv2_vnodeop_p) __P((void *));
+#endif /* NFS */
+	dev_t dev;
+	char *name;
+	UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * we want to enable swapping on sdp.   the swd_vp contains
+	 * the vnode we want (locked and ref'd), and the swd_dev
+	 * contains the dev_t of the file, if it a block device.
+	 */
+
+	vp = sdp->swd_vp;
+	dev = sdp->swd_dev;
+
+	/*
+	 * open the swap file (mostly useful for block device files to
+	 * let device driver know what is up).
+	 *
+	 * we skip the open/close for root on swap because the root
+	 * has already been opened when root was mounted (mountroot).
+	 */
+	if (vp != rootvp) {
+		if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
+			return (error);
+	}
+
+	/* XXX this only works for block devices */
+	UVMHIST_LOG(pdhist, "  dev=%d, major(dev)=%d", dev, major(dev), 0,0);
+
+	/*
+	 * we now need to determine the size of the swap area.   for
+	 * block specials we can call the d_psize function.
+	 * for normal files, we must stat [get attrs].
+	 *
+	 * we put the result in nblks.
+	 * for normal files, we also want the filesystem block size
+	 * (which we get with statfs).
+	 */
+	switch (vp->v_type) {
+	case VBLK:
+		if (bdevsw[major(dev)].d_psize == 0 ||
+		    (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+			error = ENXIO;
+			goto bad;
+		}
+		break;
+
+#ifdef SWAP_TO_FILES
+	case VREG:
+		if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
+			goto bad;
+		nblocks = (int)btodb(va.va_size);
+		if ((error =
+		     VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
+			goto bad;
+
+		sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
+		/*
+		 * limit the max # of outstanding I/O requests we issue
+		 * at any one time.   take it easy on NFS servers.
+		 */
+#ifdef NFS
+		if (vp->v_op == nfsv2_vnodeop_p)
+			sdp->swd_maxactive = 2; /* XXX */
+		else
+#endif /* NFS */
+			sdp->swd_maxactive = 8; /* XXX */
+		break;
+#endif
+
+	default:
+		error = ENXIO;
+		goto bad;
+	}
+
+	/*
+	 * save nblocks in a safe place and convert to pages.
+	 */
+
+	sdp->swd_ose.ose_nblks = nblocks;
+	npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
+
+	/*
+	 * for block special files, we want to make sure that leave
+	 * the disklabel and bootblocks alone, so we arrange to skip
+	 * over them (randomly choosing to skip PAGE_SIZE bytes).
+	 * note that because of this the "size" can be less than the
+	 * actual number of blocks on the device.
+	 */
+	if (vp->v_type == VBLK) {
+		/* we use pages 1 to (size - 1) [inclusive] */
+		size = npages - 1;
+		addr = 1;
+	} else {
+		/* we use pages 0 to (size - 1) [inclusive] */
+		size = npages;
+		addr = 0;
+	}
+
+	/*
+	 * make sure we have enough blocks for a reasonable sized swap
+	 * area.   we want at least one page.
+	 */
+
+	if (size < 1) {
+		UVMHIST_LOG(pdhist, "  size <= 1!!", 0, 0, 0, 0);
+		error = EINVAL;
+		goto bad;
+	}
+
+	UVMHIST_LOG(pdhist, "  dev=%x: size=%d addr=%ld\n", dev, size, addr, 0);
+
+	/*
+	 * now we need to allocate an extent to manage this swap device
+	 */
+	name = malloc(12, M_VMSWAP, M_WAITOK);
+	sprintf(name, "swap0x%04x", count++);
+
+	/* note that extent_create's 3rd arg is inclusive, thus "- 1" */
+	sdp->swd_ex = extent_create(name, 0, npages - 1, M_VMSWAP,
+				    0, 0, EX_WAITOK);
+	/* allocate the `saved' region from the extent so it won't be used */
+	if (addr) {
+		if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
+			panic("disklabel region");
+		sdp->swd_npginuse += addr;
+		uvmexp.swpginuse += addr;
+	}
+	
+
+	/*
+	 * if the vnode we are swapping to is the root vnode 
+	 * (i.e. we are swapping to the miniroot) then we want
+	 * to make sure we don't overwrite it.   do a statfs to 
+	 * find its size and skip over it.
+	 */
+	if (vp == rootvp) {
+		struct mount *mp;
+		struct statfs *sp;
+		int rootblocks, rootpages;
+
+		mp = rootvnode->v_mount;
+		sp = &mp->mnt_stat;
+		rootblocks = sp->f_blocks * btodb(sp->f_bsize);
+		rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
+		if (rootpages > npages)
+			panic("swap_on: miniroot larger than swap?");
+
+		if (extent_alloc_region(sdp->swd_ex, addr, 
+					rootpages, EX_WAITOK))
+			panic("swap_on: unable to preserve miniroot");
+
+		sdp->swd_npginuse += (rootpages - addr);
+		uvmexp.swpginuse += (rootpages - addr);
+
+		printf("Preserved %d pages of miniroot ", rootpages);
+		printf("leaving %d pages of swap\n", size - rootpages);
+	}
+
+	/*
+	 * now add the new swapdev to the drum and enable.
+	 */
+	simple_lock(&swap_data_lock);
+	swapdrum_add(sdp, npages);
+	sdp->swd_npages = npages;
+	sdp->swd_flags &= ~SWF_FAKE;	/* going live */
+	sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
+	simple_unlock(&swap_data_lock);
+	uvmexp.swpages += npages;
+
+	/*
+	 * add anon's to reflect the swap space we added
+	 */
+	uvm_anon_add(size);
+
+#if 0
+	/*
+	 * At this point we could arrange to reserve memory for the
+	 * swap buffer pools.
+	 *
+	 * I don't think this is necessary, since swapping starts well
+	 * ahead of serious memory deprivation and the memory resource
+	 * pools hold on to actively used memory. This should ensure
+	 * we always have some resources to continue operation.
+	 */
+
+	int s = splbio();
+	int n = 8 * sdp->swd_maxactive;
+
+	(void)pool_prime(swapbuf_pool, n, 0);
+
+	if (vp->v_type == VREG) {
+		/* Allocate additional vnx and vnd buffers */
+		/*
+		 * Allocation Policy:
+		 *	(8  * swd_maxactive) vnx headers per swap dev
+		 *	(16 * swd_maxactive) vnd buffers per swap dev
+		 */
+
+		n = 8 * sdp->swd_maxactive;
+		(void)pool_prime(vndxfer_pool, n, 0);
+
+		n = 16 * sdp->swd_maxactive;
+		(void)pool_prime(vndbuf_pool, n, 0);
+	}
+	splx(s);
+#endif
+
+	return (0);
+
+bad:
+	/*
+	 * failure: close device if necessary and return error.
+	 */
+	if (vp != rootvp)
+		(void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+	return (error);
+}
+#endif
+
+#ifdef SWAP_OFF_WORKS
+/*
+ * swap_off: stop swapping on swapdev
+ *
+ * XXXCDC: what conditions go here?
+ */
+static int
+swap_off(p, sdp)
+	struct proc *p;
+	struct swapdev *sdp;
+{
+	char	*name;
+	UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
+
+	/* turn off the enable flag */
+	sdp->swd_flags &= ~SWF_ENABLE;
+
+	UVMHIST_LOG(pdhist, "  dev=%x", sdp->swd_dev);
+
+	/*
+	 * XXX write me
+	 *
+	 * the idea is to find out which processes are using this swap
+	 * device, and page them all in.
+	 *
+	 * eventually, we should try to move them out to other swap areas
+	 * if available.
+	 *
+	 * The alternative is to create a redirection map for this swap
+	 * device.  This should work by moving all the pages of data from
+	 * the ex-swap device to another one, and making an entry in the
+	 * redirection map for it.  locking is going to be important for
+	 * this!
+	 *
+	 * XXXCDC: also need to shrink anon pool
+	 */
+
+	/* until the above code is written, we must ENODEV */
+	return ENODEV;
+
+	extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);
+	name = sdp->swd_ex->ex_name;
+	extent_destroy(sdp->swd_ex);
+	free(name, M_VMSWAP);
+	free((caddr_t)sdp->swd_ex, M_VMSWAP);
+	if (sdp->swp_vp != rootvp)
+		(void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
+	if (sdp->swd_vp)
+		vrele(sdp->swd_vp);
+	free((caddr_t)sdp, M_VMSWAP);
+	return (0);
+}
+#endif
+
+/*
+ * /dev/drum interface and i/o functions
+ */
+
+/*
+ * swread: the read function for the drum (just a call to physio)
+ */
+/*ARGSUSED*/
+int
+swread(dev, uio, ioflag)
+	dev_t dev;
+	struct uio *uio;
+	int ioflag;
+{
+	UVMHIST_FUNC("swread"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "  dev=%x offset=%qx", dev, uio->uio_offset, 0, 0);
+	return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
+}
+
+/*
+ * swwrite: the write function for the drum (just a call to physio)
+ */
+/*ARGSUSED*/
+int
+swwrite(dev, uio, ioflag)
+	dev_t dev;
+	struct uio *uio;
+	int ioflag;
+{
+	UVMHIST_FUNC("swwrite"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "  dev=%x offset=%qx", dev, uio->uio_offset, 0, 0);
+	return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
+}
+
+/*
+ * swstrategy: perform I/O on the drum
+ *
+ * => we must map the i/o request from the drum to the correct swapdev.
+ */
+void
+swstrategy(bp)
+	struct buf *bp;
+{
+	struct swapdev *sdp;
+	struct vnode *vp;
+	int	pageno;
+	int	bn;
+	UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * convert block number to swapdev.   note that swapdev can't
+	 * be yanked out from under us because we are holding resources
+	 * in it (i.e. the blocks we are doing I/O on).
+	 */
+	pageno = dbtob(bp->b_blkno) >> PAGE_SHIFT;
+	simple_lock(&swap_data_lock);
+	sdp = swapdrum_getsdp(pageno);
+	simple_unlock(&swap_data_lock);
+	if (sdp == NULL) {
+		bp->b_error = EINVAL;
+		bp->b_flags |= B_ERROR;
+		biodone(bp);
+		UVMHIST_LOG(pdhist, "  failed to get swap device", 0, 0, 0, 0);
+		return;
+	}
+
+	/*
+	 * convert drum page number to block number on this swapdev.
+	 */
+
+	pageno = pageno - sdp->swd_drumoffset;	/* page # on swapdev */
+	bn = btodb(pageno << PAGE_SHIFT);	/* convert to diskblock */
+
+	UVMHIST_LOG(pdhist, "  %s: mapoff=%x bn=%x bcount=%ld\n",
+		((bp->b_flags & B_READ) == 0) ? "write" : "read",
+		sdp->swd_drumoffset, bn, bp->b_bcount);
+
+
+	/*
+	 * for block devices we finish up here.
+	 * for regular files we have to do more work which we deligate
+	 * to sw_reg_strategy().
+	 */
+
+	switch (sdp->swd_vp->v_type) {
+	default:
+		panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
+	case VBLK:
+
+		/*
+		 * must convert "bp" from an I/O on /dev/drum to an I/O
+		 * on the swapdev (sdp).
+		 */
+		bp->b_blkno = bn;		/* swapdev block number */
+		vp = sdp->swd_vp;		/* swapdev vnode pointer */
+		bp->b_dev = sdp->swd_dev;	/* swapdev dev_t */
+		VHOLD(vp);			/* "hold" swapdev vp for i/o */
+
+		/*
+		 * if we are doing a write, we have to redirect the i/o on
+		 * drum's v_numoutput counter to the swapdevs.
+		 */
+		if ((bp->b_flags & B_READ) == 0) {
+			int s = splbio();
+			vwakeup(bp);	/* kills one 'v_numoutput' on drum */
+			vp->v_numoutput++;	/* put it on swapdev */
+			splx(s);
+		}
+
+		/* 
+		 * dissassocate buffer with /dev/drum vnode 
+		 * [could be null if buf was from physio]
+		 */
+		if (bp->b_vp != NULLVP)
+			brelvp(bp);
+
+		/* 
+		 * finally plug in swapdev vnode and start I/O
+		 */
+		bp->b_vp = vp;
+		VOP_STRATEGY(bp);
+		return;
+#ifdef SWAP_TO_FILES
+	case VREG:
+		/*
+		 * deligate to sw_reg_strategy function.
+		 */
+		sw_reg_strategy(sdp, bp, bn);
+		return;
+#endif
+	}
+	/* NOTREACHED */
+}
+
+#ifdef SWAP_TO_FILES
+/*
+ * sw_reg_strategy: handle swap i/o to regular files
+ */
+static void
+sw_reg_strategy(sdp, bp, bn)
+	struct swapdev	*sdp;
+	struct buf	*bp;
+	int		bn;
+{
+	struct vnode	*vp;
+	struct vndxfer	*vnx;
+	daddr_t		nbn, byteoff;
+	caddr_t		addr;
+	int		s, off, nra, error, sz, resid;
+	UVMHIST_FUNC("sw_reg_strategy"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * allocate a vndxfer head for this transfer and point it to
+	 * our buffer.
+	 */
+	getvndxfer(vnx);
+	vnx->vx_flags = VX_BUSY;
+	vnx->vx_error = 0;
+	vnx->vx_pending = 0;
+	vnx->vx_bp = bp;
+	vnx->vx_sdp = sdp;
+
+	/*
+	 * setup for main loop where we read filesystem blocks into
+	 * our buffer.
+	 */
+	error = 0;
+	bp->b_resid = bp->b_bcount;	/* nothing transfered yet! */
+	addr = bp->b_data;		/* current position in buffer */
+	byteoff = dbtob(bn);
+
+	for (resid = bp->b_resid; resid; resid -= sz) {
+		struct vndbuf	*nbp;
+
+		/*
+		 * translate byteoffset into block number.  return values:
+		 *   vp = vnode of underlying device
+		 *  nbn = new block number (on underlying vnode dev)
+		 *  nra = num blocks we can read-ahead (excludes requested
+		 *	block)
+		 */
+		nra = 0;
+		error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
+				 	&vp, &nbn, &nra);
+
+		if (error == 0 && (long)nbn == -1) {
+			/* 
+			 * this used to just set error, but that doesn't
+			 * do the right thing.  Instead, it causes random
+			 * memory errors.  The panic() should remain until
+			 * this condition doesn't destabilize the system.
+			 */
+#if 1
+			panic("sw_reg_strategy: swap to sparse file");
+#else
+			error = EIO;	/* failure */
+#endif
+		}
+
+		/*
+		 * punt if there was an error or a hole in the file.
+		 * we must wait for any i/o ops we have already started
+		 * to finish before returning.
+		 *
+		 * XXX we could deal with holes here but it would be
+		 * a hassle (in the write case).
+		 */
+		if (error) {
+			s = splbio();
+			vnx->vx_error = error;	/* pass error up */
+			goto out;
+		}
+
+		/*
+		 * compute the size ("sz") of this transfer (in bytes).
+		 * XXXCDC: ignores read-ahead for non-zero offset
+		 */
+		if ((off = (byteoff % sdp->swd_bsize)) != 0)
+			sz = sdp->swd_bsize - off;
+		else
+			sz = (1 + nra) * sdp->swd_bsize;
+
+		if (resid < sz)
+			sz = resid;
+
+		UVMHIST_LOG(pdhist, "sw_reg_strategy: vp %p/%p offset 0x%x/0x%x",
+				sdp->swd_vp, vp, byteoff, nbn);
+
+		/*
+		 * now get a buf structure.   note that the vb_buf is
+		 * at the front of the nbp structure so that you can
+		 * cast pointers between the two structure easily.
+		 */
+		getvndbuf(nbp);
+		nbp->vb_buf.b_flags    = bp->b_flags | B_CALL;
+		nbp->vb_buf.b_bcount   = sz;
+#if 0
+		nbp->vb_buf.b_bufsize  = bp->b_bufsize; /* XXXCDC: really? */
+#endif
+		nbp->vb_buf.b_bufsize  = sz;
+		nbp->vb_buf.b_error    = 0;
+		nbp->vb_buf.b_data     = addr;
+		nbp->vb_buf.b_blkno    = nbn + btodb(off);
+		nbp->vb_buf.b_proc     = bp->b_proc;
+		nbp->vb_buf.b_iodone   = sw_reg_iodone;
+		nbp->vb_buf.b_vp       = NULLVP;
+		nbp->vb_buf.b_vnbufs.le_next = NOLIST;
+		nbp->vb_buf.b_rcred    = sdp->swd_cred;
+		nbp->vb_buf.b_wcred    = sdp->swd_cred;
+
+		/* 
+		 * set b_dirtyoff/end and b_validoff/end.   this is
+		 * required by the NFS client code (otherwise it will
+		 * just discard our I/O request).
+		 */
+		if (bp->b_dirtyend == 0) {
+			nbp->vb_buf.b_dirtyoff = 0;
+			nbp->vb_buf.b_dirtyend = sz;
+		} else {
+			nbp->vb_buf.b_dirtyoff =
+			    max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
+			nbp->vb_buf.b_dirtyend =
+			    min(sz,
+				max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
+		}
+		if (bp->b_validend == 0) {
+			nbp->vb_buf.b_validoff = 0;
+			nbp->vb_buf.b_validend = sz;
+		} else {
+			nbp->vb_buf.b_validoff =
+			    max(0, bp->b_validoff - (bp->b_bcount-resid));
+			nbp->vb_buf.b_validend =
+			    min(sz,
+				max(0, bp->b_validend - (bp->b_bcount-resid)));
+		}
+
+		nbp->vb_xfer = vnx;	/* patch it back in to vnx */
+
+		/*
+		 * Just sort by block number
+		 */
+		nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
+		s = splbio();
+		if (vnx->vx_error != 0) {
+			putvndbuf(nbp);
+			goto out;
+		}
+		vnx->vx_pending++;
+
+		/* assoc new buffer with underlying vnode */
+		bgetvp(vp, &nbp->vb_buf);	
+
+		/* sort it in and start I/O if we are not over our limit */
+		disksort(&sdp->swd_tab, &nbp->vb_buf);
+		sw_reg_start(sdp);
+		splx(s);
+
+		/*
+		 * advance to the next I/O
+		 */
+		byteoff += sz;
+		addr += sz;
+	}
+
+	s = splbio();
+
+out: /* Arrive here at splbio */
+	vnx->vx_flags &= ~VX_BUSY;
+	if (vnx->vx_pending == 0) {
+		if (vnx->vx_error != 0) {
+			bp->b_error = vnx->vx_error;
+			bp->b_flags |= B_ERROR;
+		}
+		putvndxfer(vnx);
+		biodone(bp);
+	}
+	splx(s);
+}
+
+/*
+ * sw_reg_start: start an I/O request on the requested swapdev
+ *
+ * => reqs are sorted by disksort (above)
+ */
+static void
+sw_reg_start(sdp)
+	struct swapdev	*sdp;
+{
+	struct buf	*bp;
+	UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist);
+
+	/* recursion control */
+	if ((sdp->swd_flags & SWF_BUSY) != 0)
+		return;
+
+	sdp->swd_flags |= SWF_BUSY;
+
+	while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
+		bp = sdp->swd_tab.b_actf;
+		if (bp == NULL)
+			break;
+		sdp->swd_tab.b_actf = bp->b_actf;
+		sdp->swd_tab.b_active++;
+
+		UVMHIST_LOG(pdhist,
+		    "sw_reg_start:  bp %p vp %p blkno %p cnt %lx",
+		    bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
+		if ((bp->b_flags & B_READ) == 0)
+			bp->b_vp->v_numoutput++;
+		VOP_STRATEGY(bp);
+	}
+	sdp->swd_flags &= ~SWF_BUSY;
+}
+
+/*
+ * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
+ *
+ * => note that we can recover the vndbuf struct by casting the buf ptr
+ */
+static void
+sw_reg_iodone(bp)
+	struct buf *bp;
+{
+	struct vndbuf *vbp = (struct vndbuf *) bp;
+	struct vndxfer *vnx = vbp->vb_xfer;
+	struct buf *pbp = vnx->vx_bp;		/* parent buffer */
+	struct swapdev	*sdp = vnx->vx_sdp;
+	int		s, resid;
+	UVMHIST_FUNC("sw_reg_iodone"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "  vbp=%p vp=%p blkno=%x addr=%p",
+	    vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, vbp->vb_buf.b_data);
+	UVMHIST_LOG(pdhist, "  cnt=%lx resid=%lx",
+	    vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0);
+
+	/*
+	 * protect vbp at splbio and update.
+	 */
+
+	s = splbio();
+	resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
+	pbp->b_resid -= resid;
+	vnx->vx_pending--;
+
+	if (vbp->vb_buf.b_error) {
+		UVMHIST_LOG(pdhist, "  got error=%d !",
+		    vbp->vb_buf.b_error, 0, 0, 0);
+
+		/* pass error upward */
+		vnx->vx_error = vbp->vb_buf.b_error;
+	}
+
+	/*
+	 * drop "hold" reference to vnode (if one)
+	 * XXXCDC: always set to NULLVP, this is useless, right?
+	 */
+	if (vbp->vb_buf.b_vp != NULLVP)
+		brelvp(&vbp->vb_buf);
+
+	/*
+	 * kill vbp structure
+	 */
+	putvndbuf(vbp);
+
+	/*
+	 * wrap up this transaction if it has run to completion or, in
+	 * case of an error, when all auxiliary buffers have returned.
+	 */
+	if (vnx->vx_error != 0) {
+		/* pass error upward */
+		pbp->b_flags |= B_ERROR;
+		pbp->b_error = vnx->vx_error;
+		if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
+			putvndxfer(vnx);
+			biodone(pbp);
+		}
+	} else if (pbp->b_resid == 0) {
+#ifdef DIAGNOSTIC
+		if (vnx->vx_pending != 0)
+			panic("sw_reg_iodone: vnx pending: %d",vnx->vx_pending);
+#endif
+
+		if ((vnx->vx_flags & VX_BUSY) == 0) {
+			UVMHIST_LOG(pdhist, "  iodone error=%d !",
+			    pbp, vnx->vx_error, 0, 0);
+			putvndxfer(vnx);
+			biodone(pbp);
+		}
+	}
+
+	/*
+	 * done!   start next swapdev I/O if one is pending
+	 */
+	sdp->swd_tab.b_active--;
+	sw_reg_start(sdp);
+
+	splx(s);
+}
+#endif /* SWAP_TO_FILES */
+
+
+/*
+ * uvm_swap_alloc: allocate space on swap
+ *
+ * => allocation is done "round robin" down the priority list, as we
+ *	allocate in a priority we "rotate" the circle queue.
+ * => space can be freed with uvm_swap_free
+ * => we return the page slot number in /dev/drum (0 == invalid slot)
+ * => we lock swap_data_lock
+ * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
+ */
+int
+uvm_swap_alloc(nslots, lessok)
+	int *nslots;	/* IN/OUT */
+	boolean_t lessok;
+{
+	struct swapdev *sdp;
+	struct swappri *spp;
+	u_long	result;
+	UVMHIST_FUNC("uvm_swap_alloc"); UVMHIST_CALLED(pdhist);
+
+	/*
+	 * no swap devices configured yet?   definite failure.
+	 */
+	if (uvmexp.nswapdev < 1)
+		return 0;
+	
+	/*
+	 * lock data lock, convert slots into blocks, and enter loop
+	 */
+	simple_lock(&swap_data_lock);
+
+ReTry:	/* XXXMRG */
+	for (spp = swap_priority.lh_first; spp != NULL;
+	     spp = spp->spi_swappri.le_next) {
+		for (sdp = spp->spi_swapdev.cqh_first;
+		     sdp != (void *)&spp->spi_swapdev;
+		     sdp = sdp->swd_next.cqe_next) {
+			/* if it's not enabled, then we can't swap from it */
+			if ((sdp->swd_flags & SWF_ENABLE) == 0)
+				continue;
+			if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
+				continue;
+			if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN,
+					 EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
+					 &result) != 0) {
+				continue;
+			}
+
+			/*
+			 * successful allocation!  now rotate the circleq.
+			 */
+			CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
+			CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+			sdp->swd_npginuse += *nslots;
+			uvmexp.swpginuse += *nslots;
+			simple_unlock(&swap_data_lock);
+			/* done!  return drum slot number */
+			UVMHIST_LOG(pdhist,
+			    "success!  returning %d slots starting at %d",
+			    *nslots, result + sdp->swd_drumoffset, 0, 0);
+#if 0
+{
+	struct swapdev *sdp2;
+
+	sdp2 = swapdrum_getsdp(result + sdp->swd_drumoffset);
+	if (sdp2 == NULL) {
+printf("uvm_swap_alloc:  nslots=%d, dev=%x, drumoff=%d, result=%ld",
+    *nslots, sdp->swd_dev, sdp->swd_drumoffset, result);
+panic("uvm_swap_alloc:  allocating unmapped swap block!");
+	}
+}
+#endif
+			return(result + sdp->swd_drumoffset);
+		}
+	}
+
+	/* XXXMRG: BEGIN HACK */
+	if (*nslots > 1 && lessok) {
+		*nslots = 1;
+		goto ReTry;	/* XXXMRG: ugh!  extent should support this for us */
+	}
+	/* XXXMRG: END HACK */
+
+	simple_unlock(&swap_data_lock);
+	return 0;		/* failed */
+}
+
+/*
+ * uvm_swap_free: free swap slots
+ *
+ * => this can be all or part of an allocation made by uvm_swap_alloc
+ * => we lock swap_data_lock
+ */
+void
+uvm_swap_free(startslot, nslots)
+	int startslot;
+	int nslots;
+{
+	struct swapdev *sdp;
+	UVMHIST_FUNC("uvm_swap_free"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "freeing %d slots starting at %d", nslots,
+	    startslot, 0, 0);
+	/*
+	 * convert drum slot offset back to sdp, free the blocks 
+	 * in the extent, and return.   must hold pri lock to do 
+	 * lookup and access the extent.
+	 */
+	simple_lock(&swap_data_lock);
+	sdp = swapdrum_getsdp(startslot);
+
+#ifdef DIAGNOSTIC
+	if (uvmexp.nswapdev < 1)
+		panic("uvm_swap_free: uvmexp.nswapdev < 1\n");
+	if (sdp == NULL) {
+		printf("uvm_swap_free: startslot %d, nslots %d\n", startslot,
+		    nslots);
+		panic("uvm_swap_free: unmapped address\n");
+	}
+#endif
+	if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
+			EX_MALLOCOK|EX_NOWAIT) != 0)
+		printf("warning: resource shortage: %d slots of swap lost\n",
+			nslots);
+
+	sdp->swd_npginuse -= nslots;
+	uvmexp.swpginuse -= nslots;
+#ifdef DIAGNOSTIC
+	if (sdp->swd_npginuse < 0)
+		panic("uvm_swap_free: inuse < 0");
+#endif
+	simple_unlock(&swap_data_lock);
+}
+
+/*
+ * uvm_swap_put: put any number of pages into a contig place on swap
+ *
+ * => can be sync or async
+ * => XXXMRG: consider making it an inline or macro
+ */
+int
+uvm_swap_put(swslot, ppsp, npages, flags)
+	int swslot;
+	struct vm_page **ppsp;
+	int	npages;
+	int	flags;
+{
+	int	result;
+
+#if 0
+	flags |= PGO_SYNCIO; /* XXXMRG: tmp, force sync */
+#endif
+
+	result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
+	    ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+
+	return (result);
+}
+
+/*
+ * uvm_swap_get: get a single page from swap
+ *
+ * => usually a sync op (from fault)
+ * => XXXMRG: consider making it an inline or macro
+ */
+int
+uvm_swap_get(page, swslot, flags)
+	struct vm_page *page;
+	int swslot, flags;
+{
+	int	result;
+
+	uvmexp.nswget++;
+#ifdef DIAGNOSTIC
+	if ((flags & PGO_SYNCIO) == 0)
+		printf("uvm_swap_get: ASYNC get requested?\n");
+#endif
+
+	result = uvm_swap_io(&page, swslot, 1, B_READ | 
+	    ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+
+	return (result);
+}
+
+/*
+ * uvm_swap_io: do an i/o operation to swap
+ */
+
+static int
+uvm_swap_io(pps, startslot, npages, flags)
+	struct vm_page **pps;
+	int startslot, npages, flags;
+{
+	daddr_t startblk;
+	struct swapbuf *sbp;
+	struct	buf *bp;
+	vaddr_t kva;
+	int	result, s, waitf, pflag;
+	UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "<- called, startslot=%d, npages=%d, flags=%d",
+	    startslot, npages, flags, 0);
+	/*
+	 * convert starting drum slot to block number
+	 */
+	startblk = btodb(startslot << PAGE_SHIFT);
+
+	/*
+	 * first, map the pages into the kernel (XXX: currently required
+	 * by buffer system).   note that we don't let pagermapin alloc
+	 * an aiodesc structure because we don't want to chance a malloc.
+	 * we've got our own pool of aiodesc structures (in swapbuf).
+	 */
+	waitf = (flags & B_ASYNC) ? M_NOWAIT : M_WAITOK;
+	kva = uvm_pagermapin(pps, npages, NULL, waitf);
+	if (kva == NULL)
+		return (VM_PAGER_AGAIN);
+
+	/* 
+	 * now allocate a swap buffer off of freesbufs
+	 * [make sure we don't put the pagedaemon to sleep...]
+	 */
+	s = splbio();
+	pflag = ((flags & B_ASYNC) != 0 || curproc == uvm.pagedaemon_proc)
+		? 0
+		: PR_WAITOK;
+	sbp = pool_get(swapbuf_pool, pflag);
+	splx(s);		/* drop splbio */
+
+	/*
+	 * if we failed to get a swapbuf, return "try again"
+	 */
+	if (sbp == NULL)
+		return (VM_PAGER_AGAIN);
+
+	/*
+	 * fill in the bp/sbp.   we currently route our i/o through
+	 * /dev/drum's vnode [swapdev_vp].
+	 */
+	bp = &sbp->sw_buf;
+	bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
+	bp->b_proc = &proc0;	/* XXX */
+	bp->b_rcred = bp->b_wcred = proc0.p_ucred;
+	bp->b_vnbufs.le_next = NOLIST;
+	bp->b_data = (caddr_t)kva;
+	bp->b_blkno = startblk;
+	VHOLD(swapdev_vp);
+	bp->b_vp = swapdev_vp;
+	/* XXXCDC: isn't swapdev_vp always a VCHR? */
+	/* XXXMRG: probably -- this is obviously something inherited... */
+	if (swapdev_vp->v_type == VBLK)
+		bp->b_dev = swapdev_vp->v_rdev;
+	bp->b_bcount = npages << PAGE_SHIFT;
+
+	/* 
+	 * for pageouts we must set "dirtyoff" [NFS client code needs it].
+	 * and we bump v_numoutput (counter of number of active outputs).
+	 */
+	if ((bp->b_flags & B_READ) == 0) {
+		bp->b_dirtyoff = 0;
+		bp->b_dirtyend = npages << PAGE_SHIFT;
+		s = splbio();
+		swapdev_vp->v_numoutput++;
+		splx(s);
+	}
+
+	/*
+	 * for async ops we must set up the aiodesc and setup the callback
+	 * XXX: we expect no async-reads, but we don't prevent it here.
+	 */
+	if (flags & B_ASYNC) {
+		sbp->sw_aio.aiodone = uvm_swap_aiodone;
+		sbp->sw_aio.kva = kva;
+		sbp->sw_aio.npages = npages;
+		sbp->sw_aio.pd_ptr = sbp;	/* backpointer */
+		bp->b_flags |= B_CALL;		/* set callback */
+		bp->b_iodone = uvm_swap_bufdone;/* "buf" iodone function */
+		UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
+	}
+	UVMHIST_LOG(pdhist,
+	    "about to start io: data = 0x%p blkno = 0x%x, bcount = %ld",
+	    bp->b_data, bp->b_blkno, bp->b_bcount, 0);
+
+	/*
+	 * now we start the I/O, and if async, return.
+	 */
+	VOP_STRATEGY(bp);
+	if (flags & B_ASYNC)
+		return (VM_PAGER_PEND);
+
+	/*
+	 * must be sync i/o.   wait for it to finish
+	 */
+	bp->b_error = biowait(bp);
+	result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
+
+	/*
+	 * kill the pager mapping
+	 */
+	uvm_pagermapout(kva, npages);
+
+	/*
+	 * now dispose of the swap buffer
+	 */
+	s = splbio();
+	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY|B_NOCACHE);
+	if (bp->b_vp)
+		brelvp(bp);
+
+	pool_put(swapbuf_pool, sbp);
+	splx(s);
+
+	/*
+	 * finally return.
+	 */
+	UVMHIST_LOG(pdhist, "<- done (sync)  result=%d", result, 0, 0, 0);
+	return (result);
+}
+
+/*
+ * uvm_swap_bufdone: called from the buffer system when the i/o is done
+ */
+static void
+uvm_swap_bufdone(bp)
+	struct buf *bp;
+{
+	struct swapbuf *sbp = (struct swapbuf *) bp;
+	int	s = splbio();
+	UVMHIST_FUNC("uvm_swap_bufdone"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "cleaning buf %p", buf, 0, 0, 0);
+#ifdef DIAGNOSTIC
+	/*
+	 * sanity check: swapbufs are private, so they shouldn't be wanted
+	 */
+	if (bp->b_flags & B_WANTED)
+		panic("uvm_swap_bufdone: private buf wanted");
+#endif
+
+	/*
+	 * drop buffers reference to the vnode and its flags.
+	 */
+	bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY|B_NOCACHE);
+	if (bp->b_vp)
+		brelvp(bp);
+
+	/*
+	 * now put the aio on the uvm.aio_done list and wake the
+	 * pagedaemon (which will finish up our job in its context).
+	 */
+	simple_lock(&uvm.pagedaemon_lock);	/* locks uvm.aio_done */
+	TAILQ_INSERT_TAIL(&uvm.aio_done, &sbp->sw_aio, aioq);
+	simple_unlock(&uvm.pagedaemon_lock);
+
+	thread_wakeup(&uvm.pagedaemon);
+	splx(s);
+}
+
+/*
+ * uvm_swap_aiodone: aiodone function for anonymous memory
+ *
+ * => this is called in the context of the pagedaemon (but with the
+ *	page queues unlocked!)
+ * => our "aio" structure must be part of a "swapbuf"
+ */
+static void
+uvm_swap_aiodone(aio)
+	struct uvm_aiodesc *aio;
+{
+	struct swapbuf *sbp = aio->pd_ptr;
+	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT];
+	int lcv, s;
+	vaddr_t addr;
+	UVMHIST_FUNC("uvm_swap_aiodone"); UVMHIST_CALLED(pdhist);
+
+	UVMHIST_LOG(pdhist, "done with aio %p", aio, 0, 0, 0);
+#ifdef DIAGNOSTIC
+	/*
+	 * sanity check
+	 */
+	if (aio->npages > (MAXBSIZE >> PAGE_SHIFT))
+		panic("uvm_swap_aiodone: aio too big!");
+#endif
+
+	/*
+	 * first, we have to recover the page pointers (pps) by poking in the
+	 * kernel pmap (XXX: should be saved in the buf structure).
+	 */
+	for (addr = aio->kva, lcv = 0 ; lcv < aio->npages ; 
+		addr += PAGE_SIZE, lcv++) {
+		pps[lcv] = uvm_pageratop(addr);
+	}
+
+	/*
+	 * now we can dispose of the kernel mappings of the buffer
+	 */
+	uvm_pagermapout(aio->kva, aio->npages);
+
+	/*
+	 * now we can dispose of the pages by using the dropcluster function
+	 * [note that we have no "page of interest" so we pass in null]
+	 */
+	uvm_pager_dropcluster(NULL, NULL, pps, &aio->npages, 
+				PGO_PDFREECLUST, 0);
+
+	/*
+	 * finally, we can dispose of the swapbuf
+	 */
+	s = splbio();
+	pool_put(swapbuf_pool, sbp);
+	splx(s);
+
+	/*
+	 * done!
+	 */
+}
diff --git a/sys/uvm/uvm_swap.h b/sys/uvm/uvm_swap.h
new file mode 100644
index 00000000000..008db98b241
--- /dev/null
+++ b/sys/uvm/uvm_swap.h
@@ -0,0 +1,42 @@
+/*	$NetBSD: uvm_swap.h,v 1.3 1998/02/07 11:09:48 mrg Exp $	*/
+
+/*
+ * Copyright (c) 1997 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Id: uvm_swap.h,v 1.1.2.6 1997/12/15 05:39:31 mrg Exp
+ */
+
+#ifndef _UVM_UVM_SWAP_H_
+#define _UVM_UVM_SWAP_H_
+
+int			uvm_swap_get __P((struct vm_page *, int, int));
+int			uvm_swap_put __P((int, struct vm_page **, int,
+			    int));
+int			uvm_swap_alloc __P((int *wanted, boolean_t lessok));
+void			uvm_swap_free __P((int startslot, int nslots));
+
+#endif /* _UVM_UVM_SWAP_H_ */
diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c
new file mode 100644
index 00000000000..ed1588491cc
--- /dev/null
+++ b/sys/uvm/uvm_unix.c
@@ -0,0 +1,258 @@
+/*	$NetBSD: uvm_unix.c,v 1.7 1998/10/11 23:18:21 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.  
+ * Copyright (c) 1988 University of Utah.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor,
+ *	Washington University, the University of California, Berkeley and 
+ *	its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
+ *      @(#)vm_unix.c   8.1 (Berkeley) 6/11/93
+ * from: Id: uvm_unix.c,v 1.1.2.2 1997/08/25 18:52:30 chuck Exp
+ */
+
+/*
+ * uvm_unix.c: traditional sbrk/grow interface to vm.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/core.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <vm/vm.h>
+#include <uvm/uvm.h>
+
+
+/*
+ * sys_obreak: set break
+ */
+
+int
+sys_obreak(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+	struct sys_obreak_args /* {
+		syscallarg(char *) nsize;
+	} */ *uap = v;
+	register struct vmspace *vm = p->p_vmspace;
+	vaddr_t new, old;
+	int rv;
+	register int diff;
+
+	old = (vaddr_t)vm->vm_daddr;
+	new = round_page(SCARG(uap, nsize));
+	if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
+		return(ENOMEM);
+
+	old = round_page(old + ctob(vm->vm_dsize));
+	diff = new - old;
+
+	/*
+	 * grow or shrink?
+	 */
+
+	if (diff > 0) {
+
+		rv = uvm_map(&vm->vm_map, &old, diff, NULL, UVM_UNKNOWN_OFFSET,
+		    UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
+		    UVM_ADV_NORMAL, UVM_FLAG_AMAPPAD|UVM_FLAG_FIXED|
+		    UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW)); 
+
+		if (rv != KERN_SUCCESS) {
+			uprintf("sbrk: grow failed, return = %d\n", rv);
+			return(ENOMEM);
+		}
+		vm->vm_dsize += btoc(diff);
+
+	} else if (diff < 0) {
+
+		diff = -diff;
+		rv = uvm_deallocate(&vm->vm_map, new, diff);
+		if (rv != KERN_SUCCESS) {
+			uprintf("sbrk: shrink failed, return = %d\n", rv);
+			return(ENOMEM);
+		}
+		vm->vm_dsize -= btoc(diff);
+
+	}
+	return(0);
+}
+
+/*
+ * uvm_grow: enlarge the "stack segment" to include sp.
+ */
+
+int
+uvm_grow(p, sp)
+	struct proc *p;
+	vaddr_t sp;
+{
+	register struct vmspace *vm = p->p_vmspace;
+	register int si;
+
+	/*
+	 * For user defined stacks (from sendsig).
+	 */
+	if (sp < (vaddr_t)vm->vm_maxsaddr)
+		return (0);
+
+	/*
+	 * For common case of already allocated (from trap).
+	 */
+	if (sp >= USRSTACK - ctob(vm->vm_ssize))
+		return (1);
+
+	/*
+	 * Really need to check vs limit and increment stack size if ok.
+	 */
+	si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
+	if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
+		return (0);
+	vm->vm_ssize += si;
+	return (1);
+}
+
+/*
+ * sys_oadvise: old advice system call
+ */
+
+/* ARGSUSED */
+int
+sys_ovadvise(p, v, retval)
+	struct proc *p;
+	void *v;
+	register_t *retval;
+{
+#if 0
+	struct sys_ovadvise_args /* {
+		syscallarg(int) anom;
+	} */ *uap = v;
+#endif
+
+	return (EINVAL);
+}
+
+/*
+ * uvm_coredump: dump core!
+ */
+
+int
+uvm_coredump(p, vp, cred, chdr)
+	struct proc *p;
+	struct vnode *vp;
+	struct ucred *cred;
+	struct core *chdr;
+{
+	register struct vmspace *vm = p->p_vmspace;
+	register vm_map_t map = &vm->vm_map;
+	register vm_map_entry_t entry;
+	vaddr_t start, end;
+	struct coreseg cseg;
+	off_t offset;
+	int flag, error = 0;
+
+	offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize;
+
+	for (entry = map->header.next; entry != &map->header;
+	    entry = entry->next) {
+
+		/* should never happen for a user process */
+		if (UVM_ET_ISSUBMAP(entry)) {
+			panic("uvm_coredump: user process with submap?");
+		}
+
+		if (!(entry->protection & VM_PROT_WRITE))
+			continue;
+
+		start = entry->start;
+		end = entry->end;
+
+		if (start >= VM_MAXUSER_ADDRESS)
+			continue;
+
+		if (end > VM_MAXUSER_ADDRESS)
+			end = VM_MAXUSER_ADDRESS;
+
+		if (start >= (vaddr_t)vm->vm_maxsaddr) {
+			flag = CORE_STACK;
+			start = trunc_page(USRSTACK - ctob(vm->vm_ssize));
+			if (start >= end)
+				continue;
+		} else
+			flag = CORE_DATA;
+
+		/*
+		 * Set up a new core file segment.
+		 */
+		CORE_SETMAGIC(cseg, CORESEGMAGIC, CORE_GETMID(*chdr), flag);
+		cseg.c_addr = start;
+		cseg.c_size = end - start;
+
+		error = vn_rdwr(UIO_WRITE, vp,
+		    (caddr_t)&cseg, chdr->c_seghdrsize,
+		    offset, UIO_SYSSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, NULL, p);
+		if (error)
+			break;
+
+		offset += chdr->c_seghdrsize;
+		error = vn_rdwr(UIO_WRITE, vp,
+		    (caddr_t)cseg.c_addr, (int)cseg.c_size,
+		    offset, UIO_USERSPACE,
+		    IO_NODELOCKED|IO_UNIT, cred, NULL, p);
+		if (error)
+			break;
+		
+		offset += cseg.c_size;
+		chdr->c_nseg++;
+	}
+
+	return (error);
+}
+
diff --git a/sys/uvm/uvm_user.c b/sys/uvm/uvm_user.c
new file mode 100644
index 00000000000..e3c328298b7
--- /dev/null
+++ b/sys/uvm/uvm_user.c
@@ -0,0 +1,72 @@
+/*	$NetBSD: uvm_user.c,v 1.6 1998/10/11 23:18:21 chuck Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_user.c,v 1.1.2.1 1997/08/14 19:10:41 chuck Exp
+ */
+
+/*
+ * uvm_user.c: high level uvm_allocate/uvm_deallocate interface into vm.
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <uvm/uvm.h>
+
+/*
+ * uvm_deallocate: deallocate memory (unmap)
+ */
+
+int
+uvm_deallocate(map, start, size)
+	vm_map_t map;
+	vaddr_t start;
+	vsize_t size;
+{
+
+	if (map == NULL)
+		panic("uvm_deallocate with null map");
+
+	if (size == (vaddr_t) 0)
+		return (KERN_SUCCESS);
+
+	return(uvm_unmap(map, trunc_page(start), round_page(start+size)));
+
+}
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
new file mode 100644
index 00000000000..154c009b2d0
--- /dev/null
+++ b/sys/uvm/uvm_vnode.c
@@ -0,0 +1,2067 @@
+/*	$NetBSD: uvm_vnode.c,v 1.18 1999/01/29 12:56:17 bouyer Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *         >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993
+ *      The Regents of the University of California.  
+ * Copyright (c) 1990 University of Utah.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor,
+ *	Washington University, the University of California, Berkeley and 
+ *	its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *      @(#)vnode_pager.c       8.8 (Berkeley) 2/13/94
+ * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp
+ */
+
+/*
+ * uvm_vnode.c: the vnode pager.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_vnode.h>
+
+/*
+ * private global data structure
+ *
+ * we keep a list of writeable active vnode-backed VM objects for sync op.
+ * we keep a simpleq of vnodes that are currently being sync'd.
+ */
+
+LIST_HEAD(uvn_list_struct, uvm_vnode);
+static struct uvn_list_struct uvn_wlist;	/* writeable uvns */
+static simple_lock_data_t uvn_wl_lock;		/* locks uvn_wlist */
+
+SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
+static struct uvn_sq_struct uvn_sync_q;		/* sync'ing uvns */
+lock_data_t uvn_sync_lock;			/* locks sync operation */
+
+/*
+ * functions
+ */
+
+static int		   uvn_asyncget __P((struct uvm_object *, vaddr_t,
+					    int));
+struct uvm_object 	  *uvn_attach __P((void *, vm_prot_t));
+static void		   uvn_cluster __P((struct uvm_object *, vaddr_t,
+					   vaddr_t *, vaddr_t *));
+static void                uvn_detach __P((struct uvm_object *));
+static boolean_t           uvn_flush __P((struct uvm_object *, vaddr_t, 
+					 vaddr_t, int));
+static int                 uvn_get __P((struct uvm_object *, vaddr_t,
+					vm_page_t *, int *, int, 
+					vm_prot_t, int, int));
+static void		   uvn_init __P((void));
+static int		   uvn_io __P((struct uvm_vnode *, vm_page_t *,
+				      int, int, int));
+static int		   uvn_put __P((struct uvm_object *, vm_page_t *,
+					int, boolean_t));
+static void                uvn_reference __P((struct uvm_object *));
+static boolean_t	   uvn_releasepg __P((struct vm_page *, 
+					      struct vm_page **));
+
+/*
+ * master pager structure
+ */
+
+struct uvm_pagerops uvm_vnodeops = {
+	uvn_init,
+	uvn_attach,
+	uvn_reference,
+	uvn_detach,
+	NULL,			/* no specialized fault routine required */
+	uvn_flush,
+	uvn_get,
+	uvn_asyncget,
+	uvn_put,
+	uvn_cluster,
+	uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
+	uvm_shareprot,	 /* !NULL: allow us in share maps */
+	NULL,		 /* AIO-DONE function (not until we have asyncio) */
+	uvn_releasepg,
+};
+
+/*
+ * the ops!
+ */
+
+/*
+ * uvn_init
+ *
+ * init pager private data structures.
+ */
+
+static void
+uvn_init()
+{
+
+	LIST_INIT(&uvn_wlist);
+	simple_lock_init(&uvn_wl_lock);
+	/* note: uvn_sync_q init'd in uvm_vnp_sync() */
+	lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
+}
+
+/*
+ * uvn_attach
+ *
+ * attach a vnode structure to a VM object.  if the vnode is already
+ * attached, then just bump the reference count by one and return the
+ * VM object.   if not already attached, attach and return the new VM obj.
+ * the "accessprot" tells the max access the attaching thread wants to
+ * our pages.
+ *
+ * => caller must _not_ already be holding the lock on the uvm_object.
+ * => in fact, nothing should be locked so that we can sleep here.
+ * => note that uvm_object is first thing in vnode structure, so their
+ *    pointers are equiv.
+ */
+
+struct uvm_object *
+uvn_attach(arg, accessprot)
+	void *arg;
+	vm_prot_t accessprot;
+{
+	struct vnode *vp = arg;
+	struct uvm_vnode *uvn = &vp->v_uvm;
+	struct vattr vattr;
+	int oldflags, result;
+	struct partinfo pi;
+	u_quad_t used_vnode_size;
+	UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
+
+	used_vnode_size = (u_quad_t)0;	/* XXX gcc -Wuninitialized */
+
+	/*
+	 * first get a lock on the uvn.
+	 */
+	simple_lock(&uvn->u_obj.vmobjlock);
+	while (uvn->u_flags & UVM_VNODE_BLOCKED) {
+		uvn->u_flags |= UVM_VNODE_WANTED;
+		UVMHIST_LOG(maphist, "  SLEEPING on blocked vn",0,0,0,0);
+		UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE,
+		    "uvn_attach", 0);
+		simple_lock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist,"  WOKE UP",0,0,0,0);
+	}
+
+	/*
+	 * if we're mapping a BLK device, make sure it is a disk.
+	 */
+	if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
+		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+		UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
+		return(NULL);
+	}
+
+	/*
+	 * now we have lock and uvn must not be in a blocked state.
+	 * first check to see if it is already active, in which case
+	 * we can bump the reference count, check to see if we need to
+	 * add it to the writeable list, and then return.
+	 */
+	if (uvn->u_flags & UVM_VNODE_VALID) {	/* already active? */
+
+		/* regain VREF if we were persisting */
+		if (uvn->u_obj.uo_refs == 0) {
+			VREF(vp);
+			UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)",
+			    0,0,0,0);
+		}
+		uvn->u_obj.uo_refs++;		/* bump uvn ref! */
+
+		/* check for new writeable uvn */
+		if ((accessprot & VM_PROT_WRITE) != 0 && 
+		    (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
+			simple_lock(&uvn_wl_lock);
+			LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+			simple_unlock(&uvn_wl_lock);
+			/* we are now on wlist! */
+			uvn->u_flags |= UVM_VNODE_WRITEABLE;
+		}
+
+		/* unlock and return */
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
+		    0, 0, 0);
+		return (&uvn->u_obj);
+	} 
+
+	/*
+	 * need to call VOP_GETATTR() to get the attributes, but that could
+	 * block (due to I/O), so we want to unlock the object before calling.
+	 * however, we want to keep anyone else from playing with the object
+	 * while it is unlocked.   to do this we set UVM_VNODE_ALOCK which
+	 * prevents anyone from attaching to the vnode until we are done with
+	 * it.
+	 */
+	uvn->u_flags = UVM_VNODE_ALOCK;
+	simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
+		/* XXX: curproc? */
+
+	if (vp->v_type == VBLK) {
+		/*
+		 * We could implement this as a specfs getattr call, but:
+		 *
+		 *	(1) VOP_GETATTR() would get the file system
+		 *	    vnode operation, not the specfs operation.
+		 *
+		 *	(2) All we want is the size, anyhow.
+		 */
+		result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
+		    DIOCGPART, (caddr_t)&pi, FREAD, curproc);
+		if (result == 0) {
+			/* XXX should remember blocksize */
+			used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
+			    (u_quad_t)pi.part->p_size;
+		}
+	} else {
+		result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
+		if (result == 0)
+			used_vnode_size = vattr.va_size;
+	}
+
+	/* relock object */
+	simple_lock(&uvn->u_obj.vmobjlock); 
+
+	if (result != 0) {
+		if (uvn->u_flags & UVM_VNODE_WANTED)
+			wakeup(uvn);
+		uvn->u_flags = 0;
+		simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+		UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
+		return(NULL);
+	}
+
+	/*
+	 * make sure that the newsize fits within a vaddr_t
+	 * XXX: need to revise addressing data types
+	 */
+#ifdef DEBUG
+	if (vp->v_type == VBLK)
+		printf("used_vnode_size = %qu\n", used_vnode_size);
+#endif
+	if (used_vnode_size > (vaddr_t) -PAGE_SIZE) {
+#ifdef DEBUG
+		printf("uvn_attach: vn %p size truncated %qx->%x\n", vp,
+		    used_vnode_size, -PAGE_SIZE);
+#endif    
+		used_vnode_size = (vaddr_t) -PAGE_SIZE;
+	}
+
+	/*
+	 * now set up the uvn.
+	 */
+	uvn->u_obj.pgops = &uvm_vnodeops;
+	TAILQ_INIT(&uvn->u_obj.memq);
+	uvn->u_obj.uo_npages = 0;
+	uvn->u_obj.uo_refs = 1;			/* just us... */
+	oldflags = uvn->u_flags;
+	uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
+	uvn->u_nio = 0;
+	uvn->u_size = used_vnode_size;
+
+	/* if write access, we need to add it to the wlist */
+	if (accessprot & VM_PROT_WRITE) {
+		simple_lock(&uvn_wl_lock);
+		LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+		simple_unlock(&uvn_wl_lock);
+		uvn->u_flags |= UVM_VNODE_WRITEABLE;	/* we are on wlist! */
+	}
+
+	/*
+	 * add a reference to the vnode.   this reference will stay as long
+	 * as there is a valid mapping of the vnode.   dropped when the
+	 * reference count goes to zero [and we either free or persist].
+	 */
+	VREF(vp);
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);
+
+	UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
+	return(&uvn->u_obj);
+}
+
+
+/*
+ * uvn_reference
+ *
+ * duplicate a reference to a VM object.  Note that the reference
+ * count must already be at least one (the passed in reference) so 
+ * there is no chance of the uvn being killed or locked out here.
+ *
+ * => caller must call with object unlocked.  
+ * => caller must be using the same accessprot as was used at attach time
+ */
+
+
+static void
+uvn_reference(uobj)
+	struct uvm_object *uobj;
+{
+#ifdef DIAGNOSTIC
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+#endif
+	UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);
+#ifdef DIAGNOSTIC
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+		printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
+		    uobj->uo_refs);
+		panic("uvn_reference: invalid state");
+	}
+#endif
+	uobj->uo_refs++;
+	UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)", 
+	uobj, uobj->uo_refs,0,0);
+	simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * uvn_detach
+ *
+ * remove a reference to a VM object.
+ *
+ * => caller must call with object unlocked and map locked.
+ * => this starts the detach process, but doesn't have to finish it
+ *    (async i/o could still be pending).
+ */
+static void
+uvn_detach(uobj)
+	struct uvm_object *uobj;
+{
+	struct uvm_vnode *uvn;
+	struct vnode *vp;
+	int oldflags;
+	UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
+
+	simple_lock(&uobj->vmobjlock);
+
+	UVMHIST_LOG(maphist,"  (uobj=0x%x)  ref=%d", uobj,uobj->uo_refs,0,0);
+	uobj->uo_refs--;			/* drop ref! */
+	if (uobj->uo_refs) {			/* still more refs */
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
+		return;
+	}
+
+	/*
+	 * get other pointers ...
+	 */
+
+	uvn = (struct uvm_vnode *) uobj;
+	vp = (struct vnode *) uobj;
+
+	/*
+	 * clear VTEXT flag now that there are no mappings left (VTEXT is used
+	 * to keep an active text file from being overwritten).
+	 */
+	vp->v_flag &= ~VTEXT;
+
+	/*
+	 * we just dropped the last reference to the uvn.   see if we can
+	 * let it "stick around".
+	 */
+
+	if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
+		/* won't block */
+		uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
+		simple_unlock(&uobj->vmobjlock);
+		vrele(vp);			/* drop vnode reference */
+		UVMHIST_LOG(maphist,"<- done/vrele!  (persist)", 0,0,0,0);
+		return;
+	}
+
+	/*
+	 * its a goner!
+	 */
+
+	UVMHIST_LOG(maphist,"  its a goner (flushing)!", 0,0,0,0);
+
+	uvn->u_flags |= UVM_VNODE_DYING;
+
+	/*
+	 * even though we may unlock in flush, no one can gain a reference
+	 * to us until we clear the "dying" flag [because it blocks
+	 * attaches].  we will not do that until after we've disposed of all
+	 * the pages with uvn_flush().  note that before the flush the only
+	 * pages that could be marked PG_BUSY are ones that are in async
+	 * pageout by the daemon.  (there can't be any pending "get"'s
+	 * because there are no references to the object).
+	 */
+
+	(void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+	UVMHIST_LOG(maphist,"  its a goner (done flush)!", 0,0,0,0);
+
+	/*
+	 * given the structure of this pager, the above flush request will
+	 * create the following state: all the pages that were in the object
+	 * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
+	 * the PG_BUSY bit was set either by us or the daemon for async I/O.
+	 * in either case, if we have pages left we can't kill the object
+	 * yet because i/o is pending.  in this case we set the "relkill"
+	 * flag which will cause pgo_releasepg to kill the object once all
+	 * the I/O's are done [pgo_releasepg will be called from the aiodone
+	 * routine or from the page daemon].
+	 */
+
+	if (uobj->uo_npages) {		/* I/O pending.  iodone will free */
+#ifdef DIAGNOSTIC
+		/* 
+		 * XXXCDC: very unlikely to happen until we have async i/o
+		 * so print a little info message in case it does.
+		 */
+		printf("uvn_detach: vn %p has pages left after flush - "
+		    "relkill mode\n", uobj);
+#endif
+		uvn->u_flags |= UVM_VNODE_RELKILL;
+		simple_unlock(&uobj->vmobjlock);
+		UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0,
+		    0, 0);
+		return;
+	}
+
+	/*
+	 * kill object now.   note that we can't be on the sync q because
+	 * all references are gone.
+	 */
+	if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+		simple_lock(&uvn_wl_lock);		/* protect uvn_wlist */
+		LIST_REMOVE(uvn, u_wlist);
+		simple_unlock(&uvn_wl_lock);
+	}
+#ifdef DIAGNOSTIC
+	if (uobj->memq.tqh_first != NULL)
+		panic("uvn_deref: vnode VM object still has pages afer "
+		    "syncio/free flush");
+#endif
+	oldflags = uvn->u_flags;
+	uvn->u_flags = 0;
+	simple_unlock(&uobj->vmobjlock);
+	
+	/* wake up any sleepers */
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);
+
+	/*
+	 * drop our reference to the vnode.
+	 */
+	vrele(vp);
+	UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
+
+	return;
+}
+
+/*
+ * uvm_vnp_terminate: external hook to clear out a vnode's VM
+ *
+ * called in two cases:
+ *  [1] when a persisting vnode vm object (i.e. one with a zero reference
+ *      count) needs to be freed so that a vnode can be reused.  this
+ *      happens under "getnewvnode" in vfs_subr.c.   if the vnode from
+ *      the free list is still attached (i.e. not VBAD) then vgone is
+ *	called.   as part of the vgone trace this should get called to
+ *	free the vm object.   this is the common case.
+ *  [2] when a filesystem is being unmounted by force (MNT_FORCE, 
+ *	"umount -f") the vgone() function is called on active vnodes
+ *	on the mounted file systems to kill their data (the vnodes become
+ *	"dead" ones [see src/sys/miscfs/deadfs/...]).  that results in a
+ *	call here (even if the uvn is still in use -- i.e. has a non-zero
+ *	reference count).  this case happens at "umount -f" and during a
+ *	"reboot/halt" operation.
+ *
+ * => the caller must XLOCK and VOP_LOCK the vnode before calling us
+ *	[protects us from getting a vnode that is already in the DYING
+ *	 state...]
+ * => unlike uvn_detach, this function must not return until all the
+ *	uvn's pages are disposed of.
+ * => in case [2] the uvn is still alive after this call, but all I/O
+ *	ops will fail (due to the backing vnode now being "dead").  this
+ *	will prob. kill any process using the uvn due to pgo_get failing.
+ */
+
+void
+uvm_vnp_terminate(vp)
+	struct vnode *vp;
+{
+	struct uvm_vnode *uvn = &vp->v_uvm;
+	int oldflags;
+	UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
+
+	/*
+	 * lock object and check if it is valid
+	 */
+	simple_lock(&uvn->u_obj.vmobjlock);
+	UVMHIST_LOG(maphist, "  vp=0x%x, ref=%d, flag=0x%x", vp,
+	    uvn->u_obj.uo_refs, uvn->u_flags, 0);
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
+		return;
+	}
+
+	/*
+	 * must be a valid uvn that is not already dying (because XLOCK
+	 * protects us from that).   the uvn can't in the the ALOCK state
+	 * because it is valid, and uvn's that are in the ALOCK state haven't
+	 * been marked valid yet.
+	 */
+
+#ifdef DEBUG
+	/*
+	 * debug check: are we yanking the vnode out from under our uvn?
+	 */
+	if (uvn->u_obj.uo_refs) {
+		printf("uvm_vnp_terminate(%p): terminating active vnode "
+		    "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
+	} 
+#endif
+	
+	/*
+	 * it is possible that the uvn was detached and is in the relkill
+	 * state [i.e. waiting for async i/o to finish so that releasepg can
+	 * kill object].  we take over the vnode now and cancel the relkill.
+	 * we want to know when the i/o is done so we can recycle right
+	 * away.   note that a uvn can only be in the RELKILL state if it
+	 * has a zero reference count.
+	 */
+	
+	if (uvn->u_flags & UVM_VNODE_RELKILL)
+		uvn->u_flags &= ~UVM_VNODE_RELKILL;	/* cancel RELKILL */
+
+	/*
+	 * block the uvn by setting the dying flag, and then flush the
+	 * pages.  (note that flush may unlock object while doing I/O, but
+	 * it will re-lock it before it returns control here).
+	 *
+	 * also, note that we tell I/O that we are already VOP_LOCK'd so
+	 * that uvn_io doesn't attempt to VOP_LOCK again.
+	 *
+	 * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
+	 *	due to a forceful unmount might not be a good idea.  maybe we
+	 *	need a way to pass in this info to uvn_flush through a
+	 *	pager-defined PGO_ constant [currently there are none].
+	 */
+	uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
+
+	(void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+	/*
+	 * as we just did a flush we expect all the pages to be gone or in 
+	 * the process of going.  sleep to wait for the rest to go [via iosync].
+	 */
+
+	while (uvn->u_obj.uo_npages) {
+#ifdef DIAGNOSTIC
+		struct vm_page *pp;
+		for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ; 
+		     pp = pp->listq.tqe_next) {
+			if ((pp->flags & PG_BUSY) == 0)
+				panic("uvm_vnp_terminate: detected unbusy pg");
+		}
+		if (uvn->u_nio == 0)
+			panic("uvm_vnp_terminate: no I/O to wait for?");
+		printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
+		/* 
+		 * XXXCDC: this is unlikely to happen without async i/o so we 
+		 * put a printf in just to keep an eye on it.
+		 */
+#endif
+		uvn->u_flags |= UVM_VNODE_IOSYNC;
+		UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE, 
+		    "uvn_term",0);
+		simple_lock(&uvn->u_obj.vmobjlock);
+	}
+
+	/*
+	 * done.   now we free the uvn if its reference count is zero
+	 * (true if we are zapping a persisting uvn).   however, if we are
+	 * terminating a uvn with active mappings we let it live ... future
+	 * calls down to the vnode layer will fail.
+	 */
+
+	oldflags = uvn->u_flags;
+	if (uvn->u_obj.uo_refs) {
+
+		/*
+		 * uvn must live on it is dead-vnode state until all references 
+		 * are gone.   restore flags.    clear CANPERSIST state.
+		 */
+
+		uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
+		      UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
+	
+	} else {
+
+		/*
+		 * free the uvn now.   note that the VREF reference is already
+		 * gone [it is dropped when we enter the persist state].
+		 */
+		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+			panic("uvm_vnp_terminate: io sync wanted bit set");
+
+		if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+			simple_lock(&uvn_wl_lock);
+			LIST_REMOVE(uvn, u_wlist);
+			simple_unlock(&uvn_wl_lock);
+		}
+		uvn->u_flags = 0;	/* uvn is history, clear all bits */
+	}
+
+	if (oldflags & UVM_VNODE_WANTED)
+		wakeup(uvn);		/* object lock still held */
+
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
+
+}
+
+/*
+ * uvn_releasepg: handled a released page in a uvn
+ *
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ *	to dispose of.
+ * => caller must handled PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ *	killed the page's object.    if we return TRUE, then we
+ *	return with the object locked.
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ *				with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the uvn if it is not referenced and we are suppose to
+ *	kill it ("relkill").
+ */
+
+boolean_t
+uvn_releasepg(pg, nextpgp)
+	struct vm_page *pg;
+	struct vm_page **nextpgp;	/* OUT */
+{
+	struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
+#ifdef DIAGNOSTIC
+	if ((pg->flags & PG_RELEASED) == 0)
+		panic("uvn_releasepg: page not released!");
+#endif
+	
+	/*
+	 * dispose of the page [caller handles PG_WANTED]
+	 */
+	pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+	uvm_lock_pageq();
+	if (nextpgp)
+		*nextpgp = pg->pageq.tqe_next;	/* next page for daemon */
+	uvm_pagefree(pg);
+	if (!nextpgp)
+		uvm_unlock_pageq();
+
+	/*
+	 * now see if we need to kill the object
+	 */
+	if (uvn->u_flags & UVM_VNODE_RELKILL) {
+		if (uvn->u_obj.uo_refs)
+			panic("uvn_releasepg: kill flag set on referenced "
+			    "object!");
+		if (uvn->u_obj.uo_npages == 0) {
+			if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+				simple_lock(&uvn_wl_lock);
+				LIST_REMOVE(uvn, u_wlist);
+				simple_unlock(&uvn_wl_lock);
+			}
+#ifdef DIAGNOSTIC
+			if (uvn->u_obj.memq.tqh_first)
+	panic("uvn_releasepg: pages in object with npages == 0");
+#endif
+			if (uvn->u_flags & UVM_VNODE_WANTED)
+				/* still holding object lock */
+				wakeup(uvn);
+
+			uvn->u_flags = 0;		/* DEAD! */
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			return (FALSE);
+		}
+	}
+	return (TRUE);
+}
+
+/*
+ * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
+ * through the buffer cache and allow I/O in any size.  These VOPs use
+ * synchronous i/o.  [vs. VOP_STRATEGY which can be async, but doesn't
+ * go through the buffer cache or allow I/O sizes larger than a
+ * block].  we will eventually want to change this.
+ *
+ * issues to consider:
+ *   uvm provides the uvm_aiodesc structure for async i/o management.
+ * there are two tailq's in the uvm. structure... one for pending async
+ * i/o and one for "done" async i/o.   to do an async i/o one puts
+ * an aiodesc on the "pending" list (protected by splbio()), starts the
+ * i/o and returns VM_PAGER_PEND.    when the i/o is done, we expect
+ * some sort of "i/o done" function to be called (at splbio(), interrupt
+ * time).   this function should remove the aiodesc from the pending list
+ * and place it on the "done" list and wakeup the daemon.   the daemon
+ * will run at normal spl() and will remove all items from the "done"
+ * list and call the "aiodone" hook for each done request (see uvm_pager.c).
+ * [in the old vm code, this was done by calling the "put" routine with
+ * null arguments which made the code harder to read and understand because
+ * you had one function ("put") doing two things.]  
+ *
+ * so the current pager needs: 
+ *   int uvn_aiodone(struct uvm_aiodesc *)
+ *
+ * => return KERN_SUCCESS (aio finished, free it).  otherwise requeue for
+ *	later collection.
+ * => called with pageq's locked by the daemon.
+ *
+ * general outline:
+ * - "try" to lock object.   if fail, just return (will try again later)
+ * - drop "u_nio" (this req is done!)
+ * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio }
+ * - get "page" structures (atop?).
+ * - handle "wanted" pages
+ * - handle "released" pages [using pgo_releasepg]
+ *   >>> pgo_releasepg may kill the object
+ * dont forget to look at "object" wanted flag in all cases.
+ */
+
+
+/*
+ * uvn_flush: flush pages out of a uvm object.
+ *
+ * => object should be locked by caller.   we may _unlock_ the object
+ *	if (and only if) we need to clean a page (PGO_CLEANIT).
+ *	we return with the object locked.
+ * => if PGO_CLEANIT is set, we may block (due to I/O).   thus, a caller
+ *	might want to unlock higher level resources (e.g. vm_map)
+ *	before calling flush.
+ * => if PGO_CLEANIT is not set, then we will neither unlock the object
+ *	or block.
+ * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
+ *	for flushing.
+ * => NOTE: we rely on the fact that the object's memq is a TAILQ and
+ *	that new pages are inserted on the tail end of the list.   thus,
+ *	we can make a complete pass through the object in one go by starting
+ *	at the head and working towards the tail (new pages are put in
+ *	front of us).
+ * => NOTE: we are allowed to lock the page queues, so the caller
+ *	must not be holding the lock on them [e.g. pagedaemon had
+ *	better not call us with the queues locked]
+ * => we return TRUE unless we encountered some sort of I/O error
+ *
+ * comment on "cleaning" object and PG_BUSY pages:
+ *	this routine is holding the lock on the object.   the only time
+ *	that it can run into a PG_BUSY page that it does not own is if
+ *	some other process has started I/O on the page (e.g. either
+ *	a pagein, or a pageout).    if the PG_BUSY page is being paged
+ *	in, then it can not be dirty (!PG_CLEAN) because no one has
+ *	had a chance to modify it yet.    if the PG_BUSY page is being
+ *	paged out then it means that someone else has already started
+ *	cleaning the page for us (how nice!).    in this case, if we 
+ *	have syncio specified, then after we make our pass through the
+ *	object we need to wait for the other PG_BUSY pages to clear 
+ *	off (i.e. we need to do an iosync).   also note that once a
+ *	page is PG_BUSY it must stay in its object until it is un-busyed.
+ *
+ * note on page traversal:
+ *	we can traverse the pages in an object either by going down the
+ *	linked list in "uobj->memq", or we can go over the address range
+ *	by page doing hash table lookups for each address.    depending
+ *	on how many pages are in the object it may be cheaper to do one 
+ *	or the other.   we set "by_list" to true if we are using memq.
+ *	if the cost of a hash lookup was equal to the cost of the list
+ *	traversal we could compare the number of pages in the start->stop
+ *	range to the total number of pages in the object.   however, it
+ *	seems that a hash table lookup is more expensive than the linked
+ *	list traversal, so we multiply the number of pages in the 
+ *	start->stop range by a penalty which we define below.
+ */
+
+#define UVN_HASH_PENALTY 4	/* XXX: a guess */
+
+static boolean_t
+uvn_flush(uobj, start, stop, flags)
+	struct uvm_object *uobj;
+	vaddr_t start, stop;
+	int flags;
+{
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+	struct vm_page *pp, *ppnext, *ptmp;
+	struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+	int npages, result, lcv;
+	boolean_t retval, need_iosync, by_list, needs_clean;
+	vaddr_t curoff;
+	u_short pp_version;
+	UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
+
+	curoff = 0;	/* XXX: shut up gcc */
+	/*
+	 * get init vals and determine how we are going to traverse object
+	 */
+
+	need_iosync = FALSE;
+	retval = TRUE;		/* return value */
+	if (flags & PGO_ALLPAGES) {
+		start = 0;
+		stop = round_page(uvn->u_size);
+		by_list = TRUE;		/* always go by the list */
+	} else {
+		start = trunc_page(start);
+		stop = round_page(stop);
+		if (stop > round_page(uvn->u_size))
+			printf("uvn_flush: strange, got an out of range "
+			    "flush (fixed)\n");
+
+		by_list = (uobj->uo_npages <= 
+		    ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
+	}
+
+	UVMHIST_LOG(maphist,
+	    " flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x",
+	    start, stop, by_list, flags);
+
+	/*
+	 * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as
+	 * a _hint_ as to how up to date the PG_CLEAN bit is.   if the hint
+	 * is wrong it will only prevent us from clustering... it won't break
+	 * anything.   we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster
+	 * will set them as it syncs PG_CLEAN.   This is only an issue if we
+	 * are looking at non-inactive pages (because inactive page's PG_CLEAN
+	 * bit is always up to date since there are no mappings).
+	 * [borrowed PG_CLEANCHK idea from FreeBSD VM]
+	 */
+
+	if ((flags & PGO_CLEANIT) != 0 &&
+	    uobj->pgops->pgo_mk_pcluster != NULL) {
+		if (by_list) {
+			for (pp = uobj->memq.tqh_first ; pp != NULL ;
+			    pp = pp->listq.tqe_next) {
+				if (pp->offset < start || pp->offset >= stop)
+					continue;
+				pp->flags &= ~PG_CLEANCHK;
+			}
+
+		} else {   /* by hash */
+			for (curoff = start ; curoff < stop;
+			    curoff += PAGE_SIZE) {
+				pp = uvm_pagelookup(uobj, curoff);
+				if (pp)
+					pp->flags &= ~PG_CLEANCHK;
+			}
+		}
+	}
+
+	/*
+	 * now do it.   note: we must update ppnext in body of loop or we
+	 * will get stuck.  we need to use ppnext because we may free "pp"
+	 * before doing the next loop.
+	 */
+
+	if (by_list) {
+		pp = uobj->memq.tqh_first;
+	} else {
+		curoff = start;
+		pp = uvm_pagelookup(uobj, curoff);
+	}
+
+	ppnext = NULL;	/* XXX: shut up gcc */ 
+	ppsp = NULL;		/* XXX: shut up gcc */
+	uvm_lock_pageq();	/* page queues locked */
+
+	/* locked: both page queues and uobj */
+	for ( ; (by_list && pp != NULL) || 
+	  (!by_list && curoff < stop) ; pp = ppnext) {
+
+		if (by_list) {
+
+			/*
+			 * range check
+			 */
+
+			if (pp->offset < start || pp->offset >= stop) {
+				ppnext = pp->listq.tqe_next;
+				continue;
+			}
+
+		} else {
+
+			/*
+			 * null check
+			 */
+
+			curoff += PAGE_SIZE;
+			if (pp == NULL) {
+				if (curoff < stop)
+					ppnext = uvm_pagelookup(uobj, curoff);
+				continue;
+			}
+
+		}
+
+		/*
+		 * handle case where we do not need to clean page (either
+		 * because we are not clean or because page is not dirty or
+		 * is busy):
+		 * 
+		 * NOTE: we are allowed to deactivate a non-wired active
+		 * PG_BUSY page, but once a PG_BUSY page is on the inactive
+		 * queue it must stay put until it is !PG_BUSY (so as not to
+		 * confuse pagedaemon).
+		 */
+
+		if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
+			needs_clean = FALSE;
+			if ((pp->flags & PG_BUSY) != 0 &&
+			    (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
+			             (PGO_CLEANIT|PGO_SYNCIO))
+				need_iosync = TRUE;
+		} else {
+			/*
+			 * freeing: nuke all mappings so we can sync
+			 * PG_CLEAN bit with no race
+			 */
+			if ((pp->flags & PG_CLEAN) != 0 && 
+			    (flags & PGO_FREE) != 0 &&
+			    (pp->pqflags & PQ_ACTIVE) != 0)
+				pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+			if ((pp->flags & PG_CLEAN) != 0 &&
+			    pmap_is_modified(PMAP_PGARG(pp)))
+				pp->flags &= ~(PG_CLEAN);
+			pp->flags |= PG_CLEANCHK;	/* update "hint" */
+
+			needs_clean = ((pp->flags & PG_CLEAN) == 0);
+		}
+
+		/*
+		 * if we don't need a clean... load ppnext and dispose of pp
+		 */
+		if (!needs_clean) {
+			/* load ppnext */
+			if (by_list)
+				ppnext = pp->listq.tqe_next;
+			else {
+				if (curoff < stop)
+					ppnext = uvm_pagelookup(uobj, curoff);
+			}
+
+			/* now dispose of pp */
+			if (flags & PGO_DEACTIVATE) {
+				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+				    pp->wire_count == 0) {
+					pmap_page_protect(PMAP_PGARG(pp),
+					    VM_PROT_NONE);
+					uvm_pagedeactivate(pp);
+				}
+
+			} else if (flags & PGO_FREE) {
+				if (pp->flags & PG_BUSY) {
+					/* release busy pages */
+					pp->flags |= PG_RELEASED;
+				} else {
+					pmap_page_protect(PMAP_PGARG(pp),
+					    VM_PROT_NONE);
+					/* removed page from object */
+					uvm_pagefree(pp);
+				}
+			}
+			/* ppnext is valid so we can continue... */
+			continue;
+		}
+
+		/*
+		 * pp points to a page in the locked object that we are
+		 * working on.  if it is !PG_CLEAN,!PG_BUSY and we asked
+		 * for cleaning (PGO_CLEANIT).  we clean it now.
+		 *
+		 * let uvm_pager_put attempted a clustered page out.
+		 * note: locked: uobj and page queues.
+		 */
+
+		pp->flags |= PG_BUSY;	/* we 'own' page now */
+		UVM_PAGE_OWN(pp, "uvn_flush");
+		pmap_page_protect(PMAP_PGARG(pp), VM_PROT_READ);
+		pp_version = pp->version;
+ReTry:
+		ppsp = pps;
+		npages = sizeof(pps) / sizeof(struct vm_page *);
+
+		/* locked: page queues, uobj */
+		result = uvm_pager_put(uobj, pp, &ppsp, &npages, 
+			   flags | PGO_DOACTCLUST, start, stop);
+		/* unlocked: page queues, uobj */
+
+		/*
+		 * at this point nothing is locked.   if we did an async I/O
+		 * it is remotely possible for the async i/o to complete and 
+		 * the page "pp" be freed or what not before we get a chance 
+		 * to relock the object.   in order to detect this, we have
+		 * saved the version number of the page in "pp_version".
+		 */
+
+		/* relock! */
+		simple_lock(&uobj->vmobjlock);
+		uvm_lock_pageq();
+
+		/*
+		 * VM_PAGER_AGAIN: given the structure of this pager, this 
+		 * can only happen when  we are doing async I/O and can't
+		 * map the pages into kernel memory (pager_map) due to lack
+		 * of vm space.   if this happens we drop back to sync I/O.
+		 */
+
+		if (result == VM_PAGER_AGAIN) {
+			/* 
+			 * it is unlikely, but page could have been released
+			 * while we had the object lock dropped.   we ignore
+			 * this now and retry the I/O.  we will detect and
+			 * handle the released page after the syncio I/O
+			 * completes.
+			 */
+#ifdef DIAGNOSTIC
+			if (flags & PGO_SYNCIO)
+	panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)");
+#endif
+			flags |= PGO_SYNCIO;
+			goto ReTry;
+		}
+
+		/*
+		 * the cleaning operation is now done.   finish up.  note that
+		 * on error (!OK, !PEND) uvm_pager_put drops the cluster for us.
+		 * if success (OK, PEND) then uvm_pager_put returns the cluster
+		 * to us in ppsp/npages.
+		 */
+
+		/*
+		 * for pending async i/o if we are not deactivating/freeing
+		 * we can move on to the next page.
+		 */
+
+		if (result == VM_PAGER_PEND) {
+
+			if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
+				/*
+				 * no per-page ops: refresh ppnext and continue
+				 */
+				if (by_list) {
+					if (pp->version == pp_version)
+						ppnext = pp->listq.tqe_next;
+					else
+						/* reset */
+						ppnext = uobj->memq.tqh_first;
+				} else {
+					if (curoff < stop)
+						ppnext = uvm_pagelookup(uobj,
+						    curoff);
+				}
+				continue;
+			}
+
+			/* need to do anything here? */
+		}
+
+		/*
+		 * need to look at each page of the I/O operation.  we defer 
+		 * processing "pp" until the last trip through this "for" loop 
+		 * so that we can load "ppnext" for the main loop after we
+		 * play with the cluster pages [thus the "npages + 1" in the 
+		 * loop below].
+		 */
+
+		for (lcv = 0 ; lcv < npages + 1 ; lcv++) {
+
+			/*
+			 * handle ppnext for outside loop, and saving pp
+			 * until the end.
+			 */
+			if (lcv < npages) {
+				if (ppsp[lcv] == pp)
+					continue; /* skip pp until the end */
+				ptmp = ppsp[lcv];
+			} else {
+				ptmp = pp;
+
+				/* set up next page for outer loop */
+				if (by_list) {
+					if (pp->version == pp_version)
+						ppnext = pp->listq.tqe_next;
+					else
+						/* reset */
+						ppnext = uobj->memq.tqh_first;
+				} else {
+					if (curoff < stop)
+					ppnext = uvm_pagelookup(uobj, curoff);
+				}
+			}
+
+			/*
+			 * verify the page didn't get moved while obj was
+			 * unlocked
+			 */
+			if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
+				continue;
+
+			/*
+			 * unbusy the page if I/O is done.   note that for
+			 * pending I/O it is possible that the I/O op
+			 * finished before we relocked the object (in
+			 * which case the page is no longer busy).
+			 */
+
+			if (result != VM_PAGER_PEND) {
+				if (ptmp->flags & PG_WANTED)
+					/* still holding object lock */
+					thread_wakeup(ptmp);
+
+				ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+				UVM_PAGE_OWN(ptmp, NULL);
+				if (ptmp->flags & PG_RELEASED) {
+
+					/* pgo_releasepg wants this */
+					uvm_unlock_pageq();
+					if (!uvn_releasepg(ptmp, NULL))
+						return (TRUE);
+
+					uvm_lock_pageq();	/* relock */
+					continue;		/* next page */
+
+				} else {
+					ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
+					if ((flags & PGO_FREE) == 0)
+						pmap_clear_modify(
+						    PMAP_PGARG(ptmp));
+				}
+			}
+	  
+			/*
+			 * dispose of page
+			 */
+
+			if (flags & PGO_DEACTIVATE) {
+				if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+				    pp->wire_count == 0) {
+					pmap_page_protect(PMAP_PGARG(ptmp),
+					    VM_PROT_NONE);
+					uvm_pagedeactivate(ptmp);
+				}
+
+			} else if (flags & PGO_FREE) {
+				if (result == VM_PAGER_PEND) {
+					if ((ptmp->flags & PG_BUSY) != 0)
+						/* signal for i/o done */
+						ptmp->flags |= PG_RELEASED;
+				} else {
+					if (result != VM_PAGER_OK) {
+						printf("uvn_flush: obj=%p, "
+						   "offset=0x%lx.  error "
+						   "during pageout.\n",
+						    pp->uobject, pp->offset);
+						printf("uvn_flush: WARNING: "
+						    "changes to page may be "
+						    "lost!\n");
+						retval = FALSE;
+					}
+					pmap_page_protect(PMAP_PGARG(ptmp),
+					    VM_PROT_NONE);
+					uvm_pagefree(ptmp);
+				}
+			}
+
+		}		/* end of "lcv" for loop */
+
+	}		/* end of "pp" for loop */
+
+	/*
+	 * done with pagequeues: unlock
+	 */
+	uvm_unlock_pageq();
+
+	/*
+	 * now wait for all I/O if required.
+	 */
+	if (need_iosync) {
+
+		UVMHIST_LOG(maphist,"  <<DOING IOSYNC>>",0,0,0,0);
+		while (uvn->u_nio != 0) {
+			uvn->u_flags |= UVM_VNODE_IOSYNC;
+			UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, 
+			  FALSE, "uvn_flush",0);
+			simple_lock(&uvn->u_obj.vmobjlock);
+		}
+		if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+			wakeup(&uvn->u_flags);
+		uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
+	}
+
+	/* return, with object locked! */
+	UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0);
+	return(retval);
+}
+
+/*
+ * uvn_cluster
+ *
+ * we are about to do I/O in an object at offset.   this function is called
+ * to establish a range of offsets around "offset" in which we can cluster
+ * I/O.
+ *
+ * - currently doesn't matter if obj locked or not.
+ */
+
+static void
+uvn_cluster(uobj, offset, loffset, hoffset)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	vaddr_t *loffset, *hoffset; /* OUT */
+{
+	struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+	*loffset = offset;
+
+	if (*loffset >= uvn->u_size)
+		panic("uvn_cluster: offset out of range");
+
+	/*
+	 * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
+	 */
+	*hoffset = *loffset + MAXBSIZE;
+	if (*hoffset > round_page(uvn->u_size))	/* past end? */
+		*hoffset = round_page(uvn->u_size);
+
+	return;
+}
+
+/*
+ * uvn_put: flush page data to backing store.
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked!   we will _unlock_ it before starting I/O.
+ * => flags: PGO_SYNCIO -- use sync. I/O
+ * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ *	[thus we never do async i/o!  see iodone comment]
+ */
+
+static int
+uvn_put(uobj, pps, npages, flags)
+	struct uvm_object *uobj;
+	struct vm_page **pps;
+	int npages, flags;
+{
+	int retval;
+
+	/* note: object locked */
+	retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
+	/* note: object unlocked */
+
+	return(retval);
+}
+
+
+/*
+ * uvn_get: get pages (synchronously) from backing store
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked!  we will _unlock_ it before starting any I/O.
+ * => flags: PGO_ALLPAGES: get all of the pages
+ *           PGO_LOCKED: fault data structures are locked
+ * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
+ * => NOTE: caller must check for released pages!!
+ */
+ 
+static int
+uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	struct vm_page **pps;		/* IN/OUT */
+	int *npagesp;			/* IN (OUT if PGO_LOCKED) */
+	int centeridx, advice, flags;
+	vm_prot_t access_type;
+{
+	vaddr_t current_offset;
+	struct vm_page *ptmp;
+	int lcv, result, gotpages;
+	boolean_t done;
+	UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
+	UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
+
+	/*
+	 * step 1: handled the case where fault data structures are locked.
+	 */
+
+	if (flags & PGO_LOCKED) {
+
+		/*
+		 * gotpages is the current number of pages we've gotten (which
+		 * we pass back up to caller via *npagesp.
+		 */
+
+		gotpages = 0;
+
+		/*
+		 * step 1a: get pages that are already resident.   only do this
+		 * if the data structures are locked (i.e. the first time
+		 * through).
+		 */
+
+		done = TRUE;	/* be optimistic */
+
+		for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
+		    lcv++, current_offset += PAGE_SIZE) {
+
+			/* do we care about this page?  if not, skip it */
+			if (pps[lcv] == PGO_DONTCARE)
+				continue;
+
+			/* lookup page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* to be useful must get a non-busy, non-released pg */
+			if (ptmp == NULL ||
+			    (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				if (lcv == centeridx || (flags & PGO_ALLPAGES)
+				    != 0)
+				done = FALSE;	/* need to do a wait or I/O! */
+				continue;
+			}
+
+			/*
+			 * useful page: busy/lock it and plug it in our
+			 * result array
+			 */
+			ptmp->flags |= PG_BUSY;		/* loan up to caller */
+			UVM_PAGE_OWN(ptmp, "uvn_get1");
+			pps[lcv] = ptmp;
+			gotpages++;
+
+		}	/* "for" lcv loop */
+
+		/*
+		 * XXX: given the "advice", should we consider async read-ahead?
+		 * XXX: fault current does deactive of pages behind us.  is 
+		 * this good (other callers might now).
+		 */
+		/* 
+		 * XXX: read-ahead currently handled by buffer cache (bread)
+		 * level.
+		 * XXX: no async i/o available.
+		 * XXX: so we don't do anything now.
+		 */
+
+		/*
+		 * step 1c: now we've either done everything needed or we to
+		 * unlock and do some waiting or I/O.
+		 */
+
+		*npagesp = gotpages;		/* let caller know */
+		if (done)
+			return(VM_PAGER_OK);		/* bingo! */
+		else
+			/* EEK!   Need to unlock and I/O */
+			return(VM_PAGER_UNLOCK);
+	}
+
+	/*
+	 * step 2: get non-resident or busy pages.
+	 * object is locked.   data structures are unlocked.
+	 *
+	 * XXX: because we can't do async I/O at this level we get things
+	 * page at a time (otherwise we'd chunk).   the VOP_READ() will do 
+	 * async-read-ahead for us at a lower level.
+	 */
+
+	for (lcv = 0, current_offset = offset ; 
+			 lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
+		
+		/* skip over pages we've already gotten or don't want */
+		/* skip over pages we don't _have_ to get */
+		if (pps[lcv] != NULL || (lcv != centeridx &&
+		    (flags & PGO_ALLPAGES) == 0))
+			continue;
+
+		/*
+		 * we have yet to locate the current page (pps[lcv]).   we first
+		 * look for a page that is already at the current offset.   if
+		 * we fine a page, we check to see if it is busy or released.
+		 * if that is the case, then we sleep on the page until it is
+		 * no longer busy or released and repeat the lookup.    if the
+		 * page we found is neither busy nor released, then we busy it
+		 * (so we own it) and plug it into pps[lcv].   this breaks the
+		 * following while loop and indicates we are ready to move on
+		 * to the next page in the "lcv" loop above.
+		 *
+		 * if we exit the while loop with pps[lcv] still set to NULL,
+		 * then it means that we allocated a new busy/fake/clean page
+		 * ptmp in the object and we need to do I/O to fill in the data.
+		 */
+
+		while (pps[lcv] == NULL) {	/* top of "pps" while loop */
+			
+			/* look for a current page */
+			ptmp = uvm_pagelookup(uobj, current_offset);
+
+			/* nope?   allocate one now (if we can) */
+			if (ptmp == NULL) {
+
+				ptmp = uvm_pagealloc(uobj, current_offset,
+				    NULL);	/* alloc */
+
+				/* out of RAM? */
+				if (ptmp == NULL) {
+					simple_unlock(&uobj->vmobjlock);
+					uvm_wait("uvn_getpage");
+					simple_lock(&uobj->vmobjlock);
+
+					/* goto top of pps while loop */
+					continue;	
+				}
+
+				/* 
+				 * got new page ready for I/O.  break pps
+				 * while loop.  pps[lcv] is still NULL.
+				 */
+				break;		
+			}
+
+			/* page is there, see if we need to wait on it */
+			if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+				ptmp->flags |= PG_WANTED;
+				UVM_UNLOCK_AND_WAIT(ptmp,
+				    &uobj->vmobjlock, 0, "uvn_get",0);
+				simple_lock(&uobj->vmobjlock);
+				continue;	/* goto top of pps while loop */
+			}
+			
+			/* 
+			 * if we get here then the page has become resident
+			 * and unbusy between steps 1 and 2.  we busy it
+			 * now (so we own it) and set pps[lcv] (so that we
+			 * exit the while loop).
+			 */
+			ptmp->flags |= PG_BUSY;
+			UVM_PAGE_OWN(ptmp, "uvn_get2");
+			pps[lcv] = ptmp;
+		}
+
+		/*
+		 * if we own the a valid page at the correct offset, pps[lcv]
+		 * will point to it.   nothing more to do except go to the
+		 * next page.
+		 */
+
+		if (pps[lcv])
+			continue;			/* next lcv */
+
+		/*
+		 * we have a "fake/busy/clean" page that we just allocated.  do
+		 * I/O to fill it with valid data.  note that object must be
+		 * locked going into uvn_io, but will be unlocked afterwards.
+		 */
+
+		result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
+		    PGO_SYNCIO, UIO_READ);
+
+		/*
+		 * I/O done.   object is unlocked (by uvn_io).   because we used
+		 * syncio the result can not be PEND or AGAIN.   we must relock
+		 * and check for errors.
+		 */
+
+		/* lock object.   check for errors.   */
+		simple_lock(&uobj->vmobjlock);
+		if (result != VM_PAGER_OK) {
+			if (ptmp->flags & PG_WANTED)
+				/* object lock still held */
+				thread_wakeup(ptmp);
+
+			ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+			UVM_PAGE_OWN(ptmp, NULL);
+			uvm_lock_pageq();
+			uvm_pagefree(ptmp);
+			uvm_unlock_pageq();
+			simple_unlock(&uobj->vmobjlock);
+			return(result);
+		}
+
+		/* 
+		 * we got the page!   clear the fake flag (indicates valid
+		 * data now in page) and plug into our result array.   note
+		 * that page is still busy.   
+		 *
+		 * it is the callers job to:
+		 * => check if the page is released
+		 * => unbusy the page
+		 * => activate the page
+		 */
+
+		ptmp->flags &= ~PG_FAKE;		/* data is valid ... */
+		pmap_clear_modify(PMAP_PGARG(ptmp));	/* ... and clean */
+		pps[lcv] = ptmp;
+
+	}	/* lcv loop */
+
+	/*
+	 * finally, unlock object and return.
+	 */
+
+	simple_unlock(&uobj->vmobjlock);
+	return (VM_PAGER_OK);
+}
+
+/*
+ * uvn_asyncget: start async I/O to bring pages into ram
+ *
+ * => caller must lock object(???XXX: see if this is best)
+ * => could be called from uvn_get or a madvise() fault-ahead.
+ * => if it fails, it doesn't matter.
+ */
+
+static int
+uvn_asyncget(uobj, offset, npages)
+	struct uvm_object *uobj;
+	vaddr_t offset;
+	int npages;
+{
+
+	/*
+	 * XXXCDC: we can't do async I/O yet
+	 */
+	printf("uvn_asyncget called\n");
+	return (KERN_SUCCESS);
+}
+
+/*
+ * uvn_io: do I/O to a vnode
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked!   we will _unlock_ it before starting I/O.
+ * => flags: PGO_SYNCIO -- use sync. I/O
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ *	[thus we never do async i/o!  see iodone comment]
+ */
+
+static int
+uvn_io(uvn, pps, npages, flags, rw)
+	struct uvm_vnode *uvn;
+	vm_page_t *pps;
+	int npages, flags, rw;
+{
+	struct vnode *vn;
+	struct uio uio;
+	struct iovec iov;
+	vaddr_t kva, file_offset;
+	int waitf, result, got, wanted;
+	UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
+
+	UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
+	
+	/*
+	 * init values
+	 */
+
+	waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
+	vn = (struct vnode *) uvn;
+	file_offset = pps[0]->offset;
+	
+	/*
+	 * check for sync'ing I/O.
+	 */
+	
+	while (uvn->u_flags & UVM_VNODE_IOSYNC) {
+		if (waitf == M_NOWAIT) { 
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
+			return(VM_PAGER_AGAIN);
+		}
+		uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
+		UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock, 
+			FALSE, "uvn_iosync",0);
+		simple_lock(&uvn->u_obj.vmobjlock);
+	}
+
+	/*
+	 * check size
+	 */
+
+	if (file_offset >= uvn->u_size) {
+			simple_unlock(&uvn->u_obj.vmobjlock);
+			UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
+#ifdef DIAGNOSTIC
+			printf("uvn_io: note: size check fired\n");
+#endif
+			return(VM_PAGER_BAD);
+	}
+
+	/*
+	 * first try and map the pages in (without waiting)
+	 */
+
+	kva = uvm_pagermapin(pps, npages, NULL, M_NOWAIT);
+	if (kva == NULL && waitf == M_NOWAIT) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
+		return(VM_PAGER_AGAIN);
+	}
+
+	/*
+	 * ok, now bump u_nio up.   at this point we are done with uvn
+	 * and can unlock it.   if we still don't have a kva, try again
+	 * (this time with sleep ok).
+	 */
+	
+	uvn->u_nio++;			/* we have an I/O in progress! */
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now unlocked */
+	if (kva == NULL) {
+		kva = uvm_pagermapin(pps, npages, NULL, M_WAITOK);
+	}
+
+	/*
+	 * ok, mapped in.  our pages are PG_BUSY so they are not going to
+	 * get touched (so we can look at "offset" without having to lock
+	 * the object).  set up for I/O.
+	 */
+
+	/*
+	 * fill out uio/iov
+	 */
+	
+	iov.iov_base = (caddr_t) kva;
+	wanted = npages << PAGE_SHIFT;
+	if (file_offset + wanted > uvn->u_size)
+		wanted = uvn->u_size - file_offset;	/* XXX: needed? */
+	iov.iov_len = wanted;
+	uio.uio_iov = &iov;
+	uio.uio_iovcnt = 1;
+	uio.uio_offset = file_offset;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_rw = rw;
+	uio.uio_resid = wanted;
+	uio.uio_procp = NULL;
+
+	/*
+	 * do the I/O!  (XXX: curproc?)
+	 */
+
+	UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
+
+	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY, curproc /*XXX*/);
+	/* NOTE: vnode now locked! */
+
+	if (rw == UIO_READ)
+		result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
+	else
+		result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
+
+	if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+		VOP_UNLOCK(vn, 0, curproc /*XXX*/);
+	/* NOTE: vnode now unlocked (unless vnislocked) */
+
+	UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
+
+	/*
+	 * result == unix style errno (0 == OK!)
+	 *
+	 * zero out rest of buffer (if needed)
+	 */
+
+	if (result == 0) {
+		got = wanted - uio.uio_resid;
+
+		if (wanted && got == 0) {
+			result = EIO;		/* XXX: error? */
+		} else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
+			bzero((void *) (kva + got),
+			      (npages << PAGE_SHIFT) - got);
+		}
+	}
+
+	/*
+	 * now remove pager mapping
+	 */
+	uvm_pagermapout(kva, npages);
+		
+	/*
+	 * now clean up the object (i.e. drop I/O count)
+	 */
+
+	simple_lock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now locked! */
+
+	uvn->u_nio--;			/* I/O DONE! */
+	if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
+		wakeup(&uvn->u_nio);
+	}
+	simple_unlock(&uvn->u_obj.vmobjlock);
+	/* NOTE: object now unlocked! */
+
+	/*
+	 * done!
+	 */
+
+	UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
+	if (result == 0)
+		return(VM_PAGER_OK);
+	else
+		return(VM_PAGER_ERROR);
+}
+
+/*
+ * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
+ * is gone we will kill the object (flushing dirty pages back to the vnode
+ * if needed).
+ *
+ * => returns TRUE if there was no uvm_object attached or if there was
+ *	one and we killed it [i.e. if there is no active uvn]
+ * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
+ *	needed]
+ *
+ * => XXX: given that we now kill uvn's when a vnode is recycled (without
+ *	having to hold a reference on the vnode) and given a working
+ *	uvm_vnp_sync(), how does that effect the need for this function?
+ *      [XXXCDC: seems like it can die?]
+ *
+ * => XXX: this function should DIE once we merge the VM and buffer 
+ *	cache.
+ *
+ * research shows that this is called in the following places:
+ * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
+ *	changes sizes
+ * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
+ *	are written to
+ * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
+ *	is off
+ * ffs_realloccg: when we can't extend the current block and have 
+ *	to allocate a new one we call this [XXX: why?]
+ * nfsrv_rename, rename_files: called when the target filename is there
+ *	and we want to remove it
+ * nfsrv_remove, sys_unlink: called on file we are removing
+ * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
+ *	then return "text busy"
+ * nfs_open: seems to uncache any file opened with nfs
+ * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
+ */
+
+boolean_t
+uvm_vnp_uncache(vp)
+	struct vnode *vp;
+{
+	struct uvm_vnode *uvn = &vp->v_uvm;
+
+	/*
+	 * lock uvn part of the vnode and check to see if we need to do anything
+	 */
+
+	simple_lock(&uvn->u_obj.vmobjlock);
+	if ((uvn->u_flags & UVM_VNODE_VALID) == 0 || 
+			(uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		return(TRUE);
+	}
+
+	/*
+	 * we have a valid, non-blocked uvn.   clear persist flag.
+	 * if uvn is currently active we can return now.
+	 */
+
+	uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
+	if (uvn->u_obj.uo_refs) {
+		simple_unlock(&uvn->u_obj.vmobjlock);
+		return(FALSE);
+	}
+
+	/*
+	 * uvn is currently persisting!   we have to gain a reference to
+	 * it so that we can call uvn_detach to kill the uvn.
+	 */
+
+	VREF(vp);			/* seems ok, even with VOP_LOCK */
+	uvn->u_obj.uo_refs++;		/* value is now 1 */
+	simple_unlock(&uvn->u_obj.vmobjlock);
+
+
+#ifdef DEBUG
+	/*
+	 * carry over sanity check from old vnode pager: the vnode should
+	 * be VOP_LOCK'd, and we confirm it here.
+	 */
+	if (!VOP_ISLOCKED(vp)) {
+		boolean_t is_ok_anyway = FALSE;
+#ifdef NFS
+		extern int (**nfsv2_vnodeop_p) __P((void *));
+		extern int (**spec_nfsv2nodeop_p) __P((void *));
+		extern int (**fifo_nfsv2nodeop_p) __P((void *));
+
+		/* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
+		if (vp->v_op == nfsv2_vnodeop_p ||
+		    vp->v_op == spec_nfsv2nodeop_p) {
+			is_ok_anyway = TRUE;
+		}
+		if (vp->v_op == fifo_nfsv2nodeop_p) {
+			is_ok_anyway = TRUE;
+		}
+#endif	/* NFS */
+		if (!is_ok_anyway)
+			panic("uvm_vnp_uncache: vnode not locked!");
+	}
+#endif	/* DEBUG */
+
+	/*
+	 * now drop our reference to the vnode.   if we have the sole 
+	 * reference to the vnode then this will cause it to die [as we
+	 * just cleared the persist flag].   we have to unlock the vnode 
+	 * while we are doing this as it may trigger I/O.
+	 *
+	 * XXX: it might be possible for uvn to get reclaimed while we are
+	 * unlocked causing us to return TRUE when we should not.   we ignore
+	 * this as a false-positive return value doesn't hurt us.
+	 */
+	VOP_UNLOCK(vp, 0, curproc /*XXX*/);
+	uvn_detach(&uvn->u_obj);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc/*XXX*/);
+	
+	/*
+	 * and return...
+	 */
+	
+	return(TRUE);
+}
+
+/*
+ * uvm_vnp_setsize: grow or shrink a vnode uvn
+ *
+ * grow   => just update size value
+ * shrink => toss un-needed pages
+ *
+ * => we assume that the caller has a reference of some sort to the 
+ *	vnode in question so that it will not be yanked out from under
+ *	us.
+ *
+ * called from:
+ *  => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos])
+ *  => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write)
+ *  => ffs_balloc [XXX: why? doesn't WRITE handle?]
+ *  => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr
+ *  => union fs: union_newsize
+ */
+
+void
+uvm_vnp_setsize(vp, newsize)
+	struct vnode *vp;
+	u_quad_t newsize;
+{
+	struct uvm_vnode *uvn = &vp->v_uvm;
+
+	/*
+	 * lock uvn and check for valid object, and if valid: do it!
+	 */
+	simple_lock(&uvn->u_obj.vmobjlock);
+	if (uvn->u_flags & UVM_VNODE_VALID) {
+
+		/*
+		 * make sure that the newsize fits within a vaddr_t
+		 * XXX: need to revise addressing data types
+		 */
+
+		if (newsize > (vaddr_t) -PAGE_SIZE) {
+#ifdef DEBUG
+			printf("uvm_vnp_setsize: vn %p size truncated "
+			    "%qx->%lx\n", vp, newsize, (vaddr_t)-PAGE_SIZE);
+#endif
+			newsize = (vaddr_t)-PAGE_SIZE;
+		}
+
+		/*
+		 * now check if the size has changed: if we shrink we had better
+		 * toss some pages...
+		 */
+
+		if (uvn->u_size > newsize) {
+			(void)uvn_flush(&uvn->u_obj, (vaddr_t) newsize,
+			    uvn->u_size, PGO_FREE);
+		}
+		uvn->u_size = (vaddr_t)newsize;
+	}
+	simple_unlock(&uvn->u_obj.vmobjlock);
+
+	/*
+	 * done
+	 */
+	return;
+}
+
+/*
+ * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
+ *
+ * => called from sys_sync with no VM structures locked
+ * => only one process can do a sync at a time (because the uvn
+ *    structure only has one queue for sync'ing).  we ensure this
+ *    by holding the uvn_sync_lock while the sync is in progress.
+ *    other processes attempting a sync will sleep on this lock
+ *    until we are done.
+ */
+
+void
+uvm_vnp_sync(mp)
+	struct mount *mp;
+{
+	struct uvm_vnode *uvn;
+	struct vnode *vp;
+	boolean_t got_lock;
+
+	/*
+	 * step 1: ensure we are only ones using the uvn_sync_q by locking
+	 * our lock...
+	 */
+	lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, (void *)0, curproc /*XXX*/);
+
+	/*
+	 * step 2: build up a simpleq of uvns of interest based on the 
+	 * write list.   we gain a reference to uvns of interest.  must 
+	 * be careful about locking uvn's since we will be holding uvn_wl_lock
+	 * in the body of the loop.
+	 */
+	SIMPLEQ_INIT(&uvn_sync_q);
+	simple_lock(&uvn_wl_lock);
+	for (uvn = uvn_wlist.lh_first ; uvn != NULL ;
+	    uvn = uvn->u_wlist.le_next) {
+
+		vp = (struct vnode *) uvn;
+		if (mp && vp->v_mount != mp)
+			continue;
+
+		/* attempt to gain reference */
+		while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) ==
+		    						FALSE && 
+				(uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
+			/* spin */ ;
+
+		/*
+		 * we will exit the loop if either if the following are true:
+		 *  - we got the lock [always true if NCPU == 1]
+		 *  - we failed to get the lock but noticed the vnode was
+		 * 	"blocked" -- in this case the vnode must be a dying
+		 *	vnode, and since dying vnodes are in the process of
+		 *	being flushed out, we can safely skip this one
+		 *
+		 * we want to skip over the vnode if we did not get the lock,
+		 * or if the vnode is already dying (due to the above logic).
+		 *
+		 * note that uvn must already be valid because we found it on
+		 * the wlist (this also means it can't be ALOCK'd).
+		 */
+		if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+			if (got_lock)
+				simple_unlock(&uvn->u_obj.vmobjlock);
+			continue;		/* skip it */
+		}
+		
+		/*
+		 * gain reference.   watch out for persisting uvns (need to
+		 * regain vnode REF).
+		 */
+		if (uvn->u_obj.uo_refs == 0)
+			VREF(vp);
+		uvn->u_obj.uo_refs++;
+		simple_unlock(&uvn->u_obj.vmobjlock);
+
+		/*
+		 * got it!
+		 */
+		SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
+	}
+	simple_unlock(&uvn_wl_lock);
+
+	/*
+	 * step 3: we now have a list of uvn's that may need cleaning.
+	 * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
+	 * (so we can now safely lock uvn's again).
+	 */
+
+	for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
+		simple_lock(&uvn->u_obj.vmobjlock);
+#ifdef DIAGNOSTIC
+		if (uvn->u_flags & UVM_VNODE_DYING) {
+			printf("uvm_vnp_sync: dying vnode on sync list\n");
+		}
+#endif
+		uvn_flush(&uvn->u_obj, 0, 0,
+		    PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
+
+		/*
+		 * if we have the only reference and we just cleaned the uvn,
+		 * then we can pull it out of the UVM_VNODE_WRITEABLE state
+		 * thus allowing us to avoid thinking about flushing it again
+		 * on later sync ops.
+		 */
+		if (uvn->u_obj.uo_refs == 1 &&
+		    (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
+			LIST_REMOVE(uvn, u_wlist);
+			uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
+		}
+
+		simple_unlock(&uvn->u_obj.vmobjlock);
+
+		/* now drop our reference to the uvn */
+		uvn_detach(&uvn->u_obj);
+	}
+
+	/*
+	 * done!  release sync lock
+	 */
+	lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc /*XXX*/);
+}
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
new file mode 100644
index 00000000000..edd4f7b698a
--- /dev/null
+++ b/sys/uvm/uvm_vnode.h
@@ -0,0 +1,110 @@
+/*	$NetBSD: uvm_vnode.h,v 1.6 1998/08/13 02:11:04 eeh Exp $	*/
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!   
+ *	   >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Charles D. Cranor and
+ *      Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_vnode.h,v 1.1.2.4 1997/10/03 21:18:24 chuck Exp
+ */
+
+#ifndef _UVM_UVM_VNODE_H_
+#define _UVM_UVM_VNODE_H_
+
+/*
+ * uvm_vnode.h
+ *
+ * vnode handle into the VM system.
+ */
+
+/*
+ * the uvm_vnode structure.   put at the top of the vnode data structure.
+ * this allows:
+ *   (struct vnode *) == (struct uvm_vnode *) == (struct uvm_object *)
+ */
+
+struct uvm_vnode {
+	struct uvm_object u_obj;	/* the actual VM object */
+	int u_flags;			/* flags */
+	int u_nio;			/* number of running I/O requests */
+	vsize_t u_size;		/* size of object */
+
+	/* the following entry is locked by uvn_wl_lock */
+	LIST_ENTRY(uvm_vnode) u_wlist;	/* list of writeable vnode objects */
+
+	/* the following entry is locked by uvn_sync_lock */
+	SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
+};
+
+/*
+ * u_flags values
+ */
+#define UVM_VNODE_VALID		0x001	/* we are attached to the vnode */
+#define UVM_VNODE_CANPERSIST	0x002	/* we can persist after ref == 0 */
+#define UVM_VNODE_ALOCK		0x004	/* uvn_attach is locked out */
+#define UVM_VNODE_DYING		0x008	/* final detach/terminate in 
+					   progress */
+#define UVM_VNODE_RELKILL	0x010	/* uvn should be killed by releasepg
+					   when final i/o is done */
+#define UVM_VNODE_WANTED	0x020	/* someone is waiting for alock,
+					   dying, or relkill to clear */
+#define UVM_VNODE_VNISLOCKED	0x040	/* underlying vnode struct is locked
+					   (valid when DYING is true) */
+#define UVM_VNODE_IOSYNC	0x080	/* I/O sync in progress ... setter
+					   sleeps on &uvn->u_nio */
+#define UVM_VNODE_IOSYNCWANTED	0x100	/* a process is waiting for the
+					   i/o sync to clear so it can do
+					   i/o */
+#define UVM_VNODE_WRITEABLE	0x200	/* uvn has pages that are writeable */
+
+/*
+ * UVM_VNODE_BLOCKED: any condition that should new processes from
+ * touching the vnode [set WANTED and sleep to wait for it to clear]
+ */
+#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
+
+
+/*
+ * prototypes
+ */
+
+#if 0
+/*
+ * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to
+ * include sys/vnode.h, and files that include sys/vnode.h don't know
+ * what a vm_prot_t is.
+ */
+struct uvm_object  *uvn_attach __P((void *, vm_prot_t));
+#endif
+
+#endif /* _UVM_UVM_VNODE_H_ */
author	art <art@openbsd.org>	1999-02-26 01:30:10 +0000
committer	art <art@openbsd.org>	1999-02-26 01:30:10 +0000
commit	cd7ee8acd30fe8d4b178a6bcda689f469732e4bc (patch)
tree	00ca09c99c7798adde771b6c8afd33bbf1e14fc0
parent	convert to mdoc, document changes from db 1.8.6 (diff)
download	wireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.tar.xz wireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.zip