summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorart <art@openbsd.org>1999-02-26 01:30:10 +0000
committerart <art@openbsd.org>1999-02-26 01:30:10 +0000
commitcd7ee8acd30fe8d4b178a6bcda689f469732e4bc (patch)
tree00ca09c99c7798adde771b6c8afd33bbf1e14fc0
parentconvert to mdoc, document changes from db 1.8.6 (diff)
downloadwireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.tar.xz
wireguard-openbsd-cd7ee8acd30fe8d4b178a6bcda689f469732e4bc.zip
Import of uvm from NetBSD. Some local changes, some code disabled
-rw-r--r--sys/uvm/uvm.h181
-rw-r--r--sys/uvm/uvm_amap.c1066
-rw-r--r--sys/uvm/uvm_amap.h282
-rw-r--r--sys/uvm/uvm_amap_i.h291
-rw-r--r--sys/uvm/uvm_anon.c345
-rw-r--r--sys/uvm/uvm_anon.h105
-rw-r--r--sys/uvm/uvm_aobj.c1090
-rw-r--r--sys/uvm/uvm_aobj.h77
-rw-r--r--sys/uvm/uvm_ddb.h56
-rw-r--r--sys/uvm/uvm_device.c507
-rw-r--r--sys/uvm/uvm_device.h76
-rw-r--r--sys/uvm/uvm_extern.h386
-rw-r--r--sys/uvm/uvm_fault.c1747
-rw-r--r--sys/uvm/uvm_fault.h88
-rw-r--r--sys/uvm/uvm_fault_i.h203
-rw-r--r--sys/uvm/uvm_glue.c605
-rw-r--r--sys/uvm/uvm_glue.h50
-rw-r--r--sys/uvm/uvm_init.c167
-rw-r--r--sys/uvm/uvm_io.c163
-rw-r--r--sys/uvm/uvm_km.c1081
-rw-r--r--sys/uvm/uvm_km.h55
-rw-r--r--sys/uvm/uvm_loan.c755
-rw-r--r--sys/uvm/uvm_loan.h59
-rw-r--r--sys/uvm/uvm_map.c2972
-rw-r--r--sys/uvm/uvm_map.h166
-rw-r--r--sys/uvm/uvm_map_i.h243
-rw-r--r--sys/uvm/uvm_meter.c246
-rw-r--r--sys/uvm/uvm_mmap.c963
-rw-r--r--sys/uvm/uvm_object.h74
-rw-r--r--sys/uvm/uvm_page.c1122
-rw-r--r--sys/uvm/uvm_page.h132
-rw-r--r--sys/uvm/uvm_page_i.h292
-rw-r--r--sys/uvm/uvm_pager.c762
-rw-r--r--sys/uvm/uvm_pager.h158
-rw-r--r--sys/uvm/uvm_pager_i.h73
-rw-r--r--sys/uvm/uvm_pdaemon.c1012
-rw-r--r--sys/uvm/uvm_pdaemon.h86
-rw-r--r--sys/uvm/uvm_pglist.c292
-rw-r--r--sys/uvm/uvm_stat.c253
-rw-r--r--sys/uvm/uvm_stat.h245
-rw-r--r--sys/uvm/uvm_swap.c1977
-rw-r--r--sys/uvm/uvm_swap.h42
-rw-r--r--sys/uvm/uvm_unix.c258
-rw-r--r--sys/uvm/uvm_user.c72
-rw-r--r--sys/uvm/uvm_vnode.c2067
-rw-r--r--sys/uvm/uvm_vnode.h110
46 files changed, 23052 insertions, 0 deletions
diff --git a/sys/uvm/uvm.h b/sys/uvm/uvm.h
new file mode 100644
index 00000000000..4f4d5164527
--- /dev/null
+++ b/sys/uvm/uvm.h
@@ -0,0 +1,181 @@
+/* $NetBSD: uvm.h,v 1.13 1998/10/11 22:59:53 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm.h,v 1.1.2.14 1998/02/02 20:07:19 chuck Exp
+ */
+
+#ifndef _UVM_UVM_H_
+#define _UVM_UVM_H_
+
+#include <uvm/uvm_extern.h>
+
+#include <uvm/uvm_stat.h>
+
+/*
+ * pull in prototypes
+ */
+
+#include <uvm/uvm_amap.h>
+#include <uvm/uvm_aobj.h>
+#include <uvm/uvm_fault.h>
+#include <uvm/uvm_glue.h>
+#include <uvm/uvm_km.h>
+#include <uvm/uvm_loan.h>
+#include <uvm/uvm_map.h>
+#include <uvm/uvm_object.h>
+#include <uvm/uvm_page.h>
+#include <uvm/uvm_pager.h>
+#include <uvm/uvm_pdaemon.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * pull in VM_NFREELIST
+ */
+#include <machine/vmparam.h>
+
+/*
+ * uvm structure (vm global state: collected in one structure for ease
+ * of reference...)
+ */
+
+struct uvm {
+ /* vm_page related parameters */
+ /* vm_page queues */
+ struct pglist page_free[VM_NFREELIST]; /* unallocated pages */
+ struct pglist page_active; /* allocated pages, in use */
+ struct pglist page_inactive_swp;/* pages inactive (reclaim or free) */
+ struct pglist page_inactive_obj;/* pages inactive (reclaim or free) */
+ simple_lock_data_t pageqlock; /* lock for active/inactive page q */
+ simple_lock_data_t fpageqlock; /* lock for free page q */
+ /* page daemon trigger */
+ int pagedaemon; /* daemon sleeps on this */
+ struct proc *pagedaemon_proc; /* daemon's pid */
+ simple_lock_data_t pagedaemon_lock;
+ /* page hash */
+ struct pglist *page_hash; /* page hash table (vp/off->page) */
+ int page_nhash; /* number of buckets */
+ int page_hashmask; /* hash mask */
+ simple_lock_data_t hashlock; /* lock on page_hash array */
+ /* anon stuff */
+ struct vm_anon *afree; /* anon free list */
+ simple_lock_data_t afreelock; /* lock on anon free list */
+
+ /* static kernel map entry pool */
+ vm_map_entry_t kentry_free; /* free page pool */
+ simple_lock_data_t kentry_lock;
+
+ /* aio_done is locked by uvm.pagedaemon_lock and splbio! */
+ struct uvm_aiohead aio_done; /* done async i/o reqs */
+
+ /* pager VM area bounds */
+ vaddr_t pager_sva; /* start of pager VA area */
+ vaddr_t pager_eva; /* end of pager VA area */
+
+ /* kernel object: to support anonymous pageable kernel memory */
+ struct uvm_object *kernel_object;
+};
+
+extern struct uvm uvm;
+
+/*
+ * historys
+ */
+
+UVMHIST_DECL(maphist);
+UVMHIST_DECL(pdhist);
+
+/*
+ * vm_map_entry etype bits:
+ */
+
+#define UVM_ET_OBJ 0x01 /* it is a uvm_object */
+#define UVM_ET_SUBMAP 0x02 /* it is a vm_map submap */
+#define UVM_ET_COPYONWRITE 0x04 /* copy_on_write */
+#define UVM_ET_NEEDSCOPY 0x08 /* needs_copy */
+
+#define UVM_ET_ISOBJ(E) (((E)->etype & UVM_ET_OBJ) != 0)
+#define UVM_ET_ISSUBMAP(E) (((E)->etype & UVM_ET_SUBMAP) != 0)
+#define UVM_ET_ISCOPYONWRITE(E) (((E)->etype & UVM_ET_COPYONWRITE) != 0)
+#define UVM_ET_ISNEEDSCOPY(E) (((E)->etype & UVM_ET_NEEDSCOPY) != 0)
+
+/*
+ * macros
+ */
+
+/*
+ * UVM_UNLOCK_AND_WAIT: atomic unlock+wait... front end for the
+ * (poorly named) thread_sleep_msg function.
+ */
+
+#if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
+
+#define UVM_UNLOCK_AND_WAIT(event,lock,intr,msg, timo) \
+ thread_sleep_msg(event,lock,intr,msg, timo)
+
+#else
+
+#define UVM_UNLOCK_AND_WAIT(event,lock,intr,msg, timo) \
+ thread_sleep_msg(event,NULL,intr,msg, timo)
+
+#endif
+
+/*
+ * UVM_PAGE_OWN: track page ownership (only if UVM_PAGE_TRKOWN)
+ */
+
+#if defined(UVM_PAGE_TRKOWN)
+
+#define UVM_PAGE_OWN(PG, TAG) uvm_page_own(PG, TAG)
+
+#else /* UVM_PAGE_TRKOWN */
+
+#define UVM_PAGE_OWN(PG, TAG) /* nothing */
+
+#endif /* UVM_PAGE_TRKOWN */
+
+/*
+ * pull in inlines
+ */
+
+#include <uvm/uvm_amap_i.h>
+#include <uvm/uvm_fault_i.h>
+#include <uvm/uvm_map_i.h>
+#include <uvm/uvm_page_i.h>
+#include <uvm/uvm_pager_i.h>
+
+#endif /* _UVM_UVM_H_ */
diff --git a/sys/uvm/uvm_amap.c b/sys/uvm/uvm_amap.c
new file mode 100644
index 00000000000..8685f643392
--- /dev/null
+++ b/sys/uvm/uvm_amap.c
@@ -0,0 +1,1066 @@
+/* $NetBSD: uvm_amap.c,v 1.19 1999/01/28 14:46:27 chuck Exp $ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_amap.c: amap operations
+ */
+
+/*
+ * this file contains functions that perform operations on amaps. see
+ * uvm_amap.h for a brief explanation of the role of amaps in uvm.
+ */
+
+#undef UVM_AMAP_INLINE /* enable/disable amap inlines */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_AMAP_C /* ensure disabled inlines are in */
+#include <uvm/uvm.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * pool for allocation of vm_map structures. note that the pool has
+ * its own simplelock for its protection. also note that in order to
+ * avoid an endless loop, the amap pool's allocator cannot allocate
+ * memory from an amap (it currently goes through the kernel uobj, so
+ * we are ok).
+ */
+
+struct pool uvm_amap_pool;
+
+/*
+ * local functions
+ */
+
+static struct vm_amap *amap_alloc1 __P((int, int, int));
+
+#ifdef UVM_AMAP_PPREF
+/*
+ * what is ppref? ppref is an _optional_ amap feature which is used
+ * to keep track of reference counts on a per-page basis. it is enabled
+ * when UVM_AMAP_PPREF is defined.
+ *
+ * when enabled, an array of ints is allocated for the pprefs. this
+ * array is allocated only when a partial reference is added to the
+ * map (either by unmapping part of the amap, or gaining a reference
+ * to only a part of an amap). if the malloc of the array fails
+ * (M_NOWAIT), then we set the array pointer to PPREF_NONE to indicate
+ * that we tried to do ppref's but couldn't alloc the array so just
+ * give up (after all, this is an optional feature!).
+ *
+ * the array is divided into page sized "chunks." for chunks of length 1,
+ * the chunk reference count plus one is stored in that chunk's slot.
+ * for chunks of length > 1 the first slot contains (the reference count
+ * plus one) * -1. [the negative value indicates that the length is
+ * greater than one.] the second slot of the chunk contains the length
+ * of the chunk. here is an example:
+ *
+ * actual REFS: 2 2 2 2 3 1 1 0 0 0 4 4 0 1 1 1
+ * ppref: -3 4 x x 4 -2 2 -1 3 x -5 2 1 -2 3 x
+ * <----------><-><----><-------><----><-><------->
+ * (x = don't care)
+ *
+ * this allows us to allow one int to contain the ref count for the whole
+ * chunk. note that the "plus one" part is needed because a reference
+ * count of zero is neither positive or negative (need a way to tell
+ * if we've got one zero or a bunch of them).
+ *
+ * here are some in-line functions to help us.
+ */
+
+static __inline void pp_getreflen __P((int *, int, int *, int *));
+static __inline void pp_setreflen __P((int *, int, int, int));
+
+/*
+ * pp_getreflen: get the reference and length for a specific offset
+ *
+ * => ppref's amap must be locked
+ */
+static __inline void
+pp_getreflen(ppref, offset, refp, lenp)
+ int *ppref, offset, *refp, *lenp;
+{
+
+ if (ppref[offset] > 0) { /* chunk size must be 1 */
+ *refp = ppref[offset] - 1; /* don't forget to adjust */
+ *lenp = 1;
+ } else {
+ *refp = (ppref[offset] * -1) - 1;
+ *lenp = ppref[offset+1];
+ }
+}
+
+/*
+ * pp_setreflen: set the reference and length for a specific offset
+ *
+ * => ppref's amap must be locked
+ */
+static __inline void
+pp_setreflen(ppref, offset, ref, len)
+ int *ppref, offset, ref, len;
+{
+ if (len == 1) {
+ ppref[offset] = ref + 1;
+ } else {
+ ppref[offset] = (ref + 1) * -1;
+ ppref[offset+1] = len;
+ }
+}
+#endif
+
+/*
+ * amap_init: called at boot time to init global amap data structures
+ */
+
+void
+amap_init()
+
+{
+ /*
+ * Initialize the vm_amap pool.
+ */
+ pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, 0,
+ "amappl", 0, pool_page_alloc_nointr, pool_page_free_nointr,
+ M_UVMAMAP);
+}
+
+/*
+ * amap_alloc1: internal function that allocates an amap, but does not
+ * init the overlay.
+ *
+ * => lock on returned amap is init'd
+ */
+static inline struct vm_amap *
+amap_alloc1(slots, padslots, waitf)
+ int slots, padslots, waitf;
+{
+ struct vm_amap *amap;
+ int totalslots = slots + padslots;
+
+ amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK : 0);
+ if (amap == NULL)
+ return(NULL);
+
+ simple_lock_init(&amap->am_l);
+ amap->am_ref = 1;
+ amap->am_flags = 0;
+#ifdef UVM_AMAP_PPREF
+ amap->am_ppref = NULL;
+#endif
+ amap->am_maxslot = totalslots;
+ amap->am_nslot = slots;
+ amap->am_nused = 0;
+ MALLOC(amap->am_slots, int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
+ if (amap->am_slots) {
+ MALLOC(amap->am_bckptr, int *, totalslots * sizeof(int), M_UVMAMAP, waitf);
+ if (amap->am_bckptr) {
+ MALLOC(amap->am_anon, struct vm_anon **,
+ totalslots * sizeof(struct vm_anon *), M_UVMAMAP, waitf);
+ }
+ }
+
+ if (amap->am_anon)
+ return(amap);
+
+ if (amap->am_slots) {
+ FREE(amap->am_slots, M_UVMAMAP);
+ if (amap->am_bckptr)
+ FREE(amap->am_bckptr, M_UVMAMAP);
+ }
+ pool_put(&uvm_amap_pool, amap);
+ return (NULL);
+}
+
+/*
+ * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
+ *
+ * => caller should ensure sz is a multiple of PAGE_SIZE
+ * => reference count to new amap is set to one
+ * => new amap is returned unlocked
+ */
+
+struct vm_amap *
+amap_alloc(sz, padsz, waitf)
+ vaddr_t sz, padsz;
+ int waitf;
+{
+ struct vm_amap *amap;
+ int slots, padslots;
+ UVMHIST_FUNC("amap_alloc"); UVMHIST_CALLED(maphist);
+
+ AMAP_B2SLOT(slots, sz); /* load slots */
+ AMAP_B2SLOT(padslots, padsz);
+
+ amap = amap_alloc1(slots, padslots, waitf);
+ if (amap)
+ bzero(amap->am_anon, (slots + padslots) * sizeof(struct vm_anon *));
+
+ UVMHIST_LOG(maphist,"<- done, amap = 0x%x, sz=%d", amap, sz, 0, 0);
+ return(amap);
+}
+
+
+/*
+ * amap_free: free an amap
+ *
+ * => the amap must be locked (mainly for simplelock accounting)
+ * => the amap should have a zero reference count and be empty
+ */
+void
+amap_free(amap)
+ struct vm_amap *amap;
+{
+ UVMHIST_FUNC("amap_free"); UVMHIST_CALLED(maphist);
+
+#ifdef DIAGNOSTIC
+ if (amap->am_ref || amap->am_nused)
+ panic("amap_free");
+#endif
+
+ FREE(amap->am_slots, M_UVMAMAP);
+ FREE(amap->am_bckptr, M_UVMAMAP);
+ FREE(amap->am_anon, M_UVMAMAP);
+#ifdef UVM_AMAP_PPREF
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
+ FREE(amap->am_ppref, M_UVMAMAP);
+#endif
+ amap_unlock(amap); /* mainly for lock debugging */
+ pool_put(&uvm_amap_pool, amap);
+
+ UVMHIST_LOG(maphist,"<- done, freed amap = 0x%x", amap, 0, 0, 0);
+}
+
+/*
+ * amap_extend: extend the size of an amap (if needed)
+ *
+ * => called from uvm_map when we want to extend an amap to cover
+ * a new mapping (rather than allocate a new one)
+ * => amap should be unlocked (we will lock it)
+ * => to safely extend an amap it should have a reference count of
+ * one (thus it can't be shared)
+ * => XXXCDC: needs a waitflag or failure return value?
+ * => XXXCDC: support padding at this level?
+ */
+void
+amap_extend(entry, addsize)
+ vm_map_entry_t entry;
+ vsize_t addsize;
+{
+ struct vm_amap *amap = entry->aref.ar_amap;
+ int slotoff = entry->aref.ar_pageoff;
+ int slotmapped, slotadd, slotneed;
+#ifdef UVM_AMAP_PPREF
+ int *newppref, *oldppref;
+#endif
+ u_int *newsl, *newbck, *oldsl, *oldbck;
+ struct vm_anon **newover, **oldover;
+ int slotadded;
+ UVMHIST_FUNC("amap_extend"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, " (entry=0x%x, addsize=0x%x)", entry,addsize,0,0);
+
+ /*
+ * first, determine how many slots we need in the amap. don't
+ * forget that ar_pageoff could be non-zero: this means that
+ * there are some unused slots before us in the amap.
+ */
+
+ amap_lock(amap); /* lock! */
+
+ AMAP_B2SLOT(slotmapped, entry->end - entry->start); /* slots mapped */
+ AMAP_B2SLOT(slotadd, addsize); /* slots to add */
+ slotneed = slotoff + slotmapped + slotadd;
+
+ /*
+ * case 1: we already have enough slots in the map and thus
+ * only need to bump the reference counts on the slots we are
+ * adding.
+ */
+
+ if (amap->am_nslot >= slotneed) {
+#ifdef UVM_AMAP_PPREF
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+ amap_pp_adjref(amap, slotoff + slotmapped, addsize, 1);
+ }
+#endif
+ amap_unlock(amap);
+ UVMHIST_LOG(maphist,"<- done (case 1), amap = 0x%x, sltneed=%d",
+ amap, slotneed, 0, 0);
+ return; /* done! */
+ }
+
+ /*
+ * case 2: we pre-allocated slots for use and we just need to
+ * bump nslot up to take account for these slots.
+ */
+ if (amap->am_maxslot >= slotneed) {
+#ifdef UVM_AMAP_PPREF
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+ if ((slotoff + slotmapped) < amap->am_nslot)
+ amap_pp_adjref(amap, slotoff + slotmapped,
+ (amap->am_nslot - (slotoff + slotmapped)) <<
+ PAGE_SHIFT, 1);
+ pp_setreflen(amap->am_ppref, amap->am_nslot, 1,
+ slotneed - amap->am_nslot);
+ }
+#endif
+ amap->am_nslot = slotneed;
+ amap_unlock(amap);
+ /*
+ * no need to zero am_anon since that was done at
+ * alloc time and we never shrink an allocation.
+ */
+ UVMHIST_LOG(maphist,"<- done (case 2), amap = 0x%x, slotneed=%d",
+ amap, slotneed, 0, 0);
+ return;
+ }
+
+ /*
+ * case 3: we need to malloc a new amap and copy all the amap
+ * data over from old amap to the new one.
+ *
+ * XXXCDC: could we take advantage of a kernel realloc()?
+ */
+
+ amap_unlock(amap); /* unlock in case we sleep in malloc */
+#ifdef UVM_AMAP_PPREF
+ newppref = NULL;
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+ MALLOC(newppref, int *, slotneed * sizeof(int), M_UVMAMAP,
+ M_NOWAIT);
+ if (newppref == NULL) {
+ /* give up if malloc fails */
+ FREE(amap->am_ppref, M_UVMAMAP);
+ amap->am_ppref = PPREF_NONE;
+ }
+ }
+#endif
+ MALLOC(newsl, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
+ MALLOC(newbck, int *, slotneed * sizeof(int), M_UVMAMAP, M_WAITOK);
+ MALLOC(newover, struct vm_anon **, slotneed * sizeof(struct vm_anon *),
+ M_UVMAMAP, M_WAITOK);
+ amap_lock(amap); /* re-lock! */
+
+#ifdef DIAGNOSTIC
+ if (amap->am_maxslot >= slotneed)
+ panic("amap_extend: amap changed during malloc");
+#endif
+
+ /*
+ * now copy everything over to new malloc'd areas...
+ */
+
+ slotadded = slotneed - amap->am_nslot;
+
+ /* do am_slots */
+ oldsl = amap->am_slots;
+ bcopy(oldsl, newsl, sizeof(int) * amap->am_nused);
+ amap->am_slots = newsl;
+
+ /* do am_anon */
+ oldover = amap->am_anon;
+ bcopy(oldover, newover, sizeof(struct vm_anon *) * amap->am_nslot);
+ bzero(newover + amap->am_nslot, sizeof(struct vm_anon *) * slotadded);
+ amap->am_anon = newover;
+
+ /* do am_bckptr */
+ oldbck = amap->am_bckptr;
+ bcopy(oldbck, newbck, sizeof(int) * amap->am_nslot);
+ bzero(newbck + amap->am_nslot, sizeof(int) * slotadded); /* XXX: needed? */
+ amap->am_bckptr = newbck;
+
+#ifdef UVM_AMAP_PPREF
+ /* do ppref */
+ oldppref = amap->am_ppref;
+ if (newppref) {
+ bcopy(oldppref, newppref, sizeof(int) * amap->am_nslot);
+ bzero(newppref + amap->am_nslot, sizeof(int) * slotadded);
+ amap->am_ppref = newppref;
+ if ((slotoff + slotmapped) < amap->am_nslot)
+ amap_pp_adjref(amap, slotoff + slotmapped,
+ (amap->am_nslot - (slotoff + slotmapped)) <<
+ PAGE_SHIFT, 1);
+ pp_setreflen(newppref, amap->am_nslot, 1, slotadded);
+ }
+#endif
+
+ /* update master values */
+ amap->am_nslot = slotneed;
+ amap->am_maxslot = slotneed;
+
+ /* unlock */
+ amap_unlock(amap);
+
+ /* and free */
+ FREE(oldsl, M_UVMAMAP);
+ FREE(oldbck, M_UVMAMAP);
+ FREE(oldover, M_UVMAMAP);
+#ifdef UVM_AMAP_PPREF
+ if (oldppref && oldppref != PPREF_NONE)
+ FREE(oldppref, M_UVMAMAP);
+#endif
+ UVMHIST_LOG(maphist,"<- done (case 3), amap = 0x%x, slotneed=%d",
+ amap, slotneed, 0, 0);
+}
+
+/*
+ * amap_share_protect: change protection of anons in a shared amap
+ *
+ * for shared amaps, given the current data structure layout, it is
+ * not possible for us to directly locate all maps referencing the
+ * shared anon (to change the protection). in order to protect data
+ * in shared maps we use pmap_page_protect(). [this is useful for IPC
+ * mechanisms like map entry passing that may want to write-protect
+ * all mappings of a shared amap.] we traverse am_anon or am_slots
+ * depending on the current state of the amap.
+ *
+ * => entry's map and amap must be locked by the caller
+ */
+void
+amap_share_protect(entry, prot)
+ vm_map_entry_t entry;
+ vm_prot_t prot;
+{
+ struct vm_amap *amap = entry->aref.ar_amap;
+ int slots, lcv, slot, stop;
+
+ AMAP_B2SLOT(slots, (entry->end - entry->start));
+ stop = entry->aref.ar_pageoff + slots;
+
+ if (slots < amap->am_nused) {
+ /* cheaper to traverse am_anon */
+ for (lcv = entry->aref.ar_pageoff ; lcv < stop ; lcv++) {
+ if (amap->am_anon[lcv] == NULL)
+ continue;
+ if (amap->am_anon[lcv]->u.an_page != NULL)
+ pmap_page_protect(
+ PMAP_PGARG(amap->am_anon[lcv]->u.an_page),
+ prot);
+ }
+ return;
+ }
+
+ /* cheaper to traverse am_slots */
+ for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+ slot = amap->am_slots[lcv];
+ if (slot < entry->aref.ar_pageoff || slot >= stop)
+ continue;
+ if (amap->am_anon[slot]->u.an_page != NULL)
+ pmap_page_protect(
+ PMAP_PGARG(amap->am_anon[slot]->u.an_page), prot);
+ }
+ return;
+}
+
+/*
+ * amap_wipeout: wipeout all anon's in an amap; then free the amap!
+ *
+ * => called from amap_unref when the final reference to an amap is
+ * discarded (i.e. when reference count == 1)
+ * => the amap should be locked (by the caller)
+ */
+
+void
+amap_wipeout(amap)
+ struct vm_amap *amap;
+{
+ int lcv, slot;
+ struct vm_anon *anon;
+ UVMHIST_FUNC("amap_wipeout"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(amap=0x%x)", amap, 0,0,0);
+
+ for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+ int refs;
+
+ slot = amap->am_slots[lcv];
+ anon = amap->am_anon[slot];
+
+ if (anon == NULL || anon->an_ref == 0)
+ panic("amap_wipeout: corrupt amap");
+
+ simple_lock(&anon->an_lock); /* lock anon */
+
+ UVMHIST_LOG(maphist," processing anon 0x%x, ref=%d", anon,
+ anon->an_ref, 0, 0);
+
+ refs = --anon->an_ref;
+ simple_unlock(&anon->an_lock);
+ if (refs == 0) {
+ /*
+ * we had the last reference to a vm_anon. free it.
+ */
+ uvm_anfree(anon);
+ }
+ }
+
+ /*
+ * now we free the map
+ */
+
+ amap->am_ref = 0; /* ... was one */
+ amap->am_nused = 0;
+ amap_free(amap); /* will unlock and free amap */
+ UVMHIST_LOG(maphist,"<- done!", 0,0,0,0);
+}
+
+/*
+ * amap_copy: ensure that a map entry's "needs_copy" flag is false
+ * by copying the amap if necessary.
+ *
+ * => an entry with a null amap pointer will get a new (blank) one.
+ * => the map that the map entry belongs to must be locked by caller.
+ * => the amap currently attached to "entry" (if any) must be unlocked.
+ * => if canchunk is true, then we may clip the entry into a chunk
+ * => "startva" and "endva" are used only if canchunk is true. they are
+ * used to limit chunking (e.g. if you have a large space that you
+ * know you are going to need to allocate amaps for, there is no point
+ * in allowing that to be chunked)
+ */
+
+void
+amap_copy(map, entry, waitf, canchunk, startva, endva)
+ vm_map_t map;
+ vm_map_entry_t entry;
+ int waitf;
+ boolean_t canchunk;
+ vaddr_t startva, endva;
+{
+ struct vm_amap *amap, *srcamap;
+ int slots, lcv;
+ vaddr_t chunksize;
+ UVMHIST_FUNC("amap_copy"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist, " (map=%p, entry=%p, waitf=%d)", map, entry, waitf, 0);
+
+ /*
+ * is there a map to copy? if not, create one from scratch.
+ */
+
+ if (entry->aref.ar_amap == NULL) {
+
+ /*
+ * check to see if we have a large amap that we can
+ * chunk. we align startva/endva to chunk-sized
+ * boundaries and then clip to them.
+ */
+
+ if (canchunk && atop(entry->end - entry->start) >=
+ UVM_AMAP_LARGE) {
+ /* convert slots to bytes */
+ chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
+ startva = (startva / chunksize) * chunksize;
+ endva = roundup(endva, chunksize);
+ UVMHIST_LOG(maphist, " chunk amap ==> clip 0x%x->0x%x"
+ "to 0x%x->0x%x", entry->start, entry->end, startva,
+ endva);
+ UVM_MAP_CLIP_START(map, entry, startva);
+ /* watch out for endva wrap-around! */
+ if (endva >= startva)
+ UVM_MAP_CLIP_END(map, entry, endva);
+ }
+
+ UVMHIST_LOG(maphist, "<- done [creating new amap 0x%x->0x%x]",
+ entry->start, entry->end, 0, 0);
+ entry->aref.ar_pageoff = 0;
+ entry->aref.ar_amap = amap_alloc(entry->end - entry->start, 0,
+ waitf);
+ if (entry->aref.ar_amap != NULL)
+ entry->etype &= ~UVM_ET_NEEDSCOPY;
+ return;
+ }
+
+ /*
+ * first check and see if we are the only map entry
+ * referencing the amap we currently have. if so, then we can
+ * just take it over rather than copying it. note that we are
+ * reading am_ref with the amap unlocked... the value can only
+ * be one if we have the only reference to the amap (via our
+ * locked map). if we are greater than one we fall through to
+ * the next case (where we double check the value).
+ */
+
+ if (entry->aref.ar_amap->am_ref == 1) {
+ entry->etype &= ~UVM_ET_NEEDSCOPY;
+ UVMHIST_LOG(maphist, "<- done [ref cnt = 1, took it over]",
+ 0, 0, 0, 0);
+ return;
+ }
+
+ /*
+ * looks like we need to copy the map.
+ */
+
+ UVMHIST_LOG(maphist," amap=%p, ref=%d, must copy it",
+ entry->aref.ar_amap, entry->aref.ar_amap->am_ref, 0, 0);
+ AMAP_B2SLOT(slots, entry->end - entry->start);
+ amap = amap_alloc1(slots, 0, waitf);
+ if (amap == NULL) {
+ UVMHIST_LOG(maphist, " amap_alloc1 failed", 0,0,0,0);
+ return;
+ }
+ srcamap = entry->aref.ar_amap;
+ amap_lock(srcamap);
+
+ /*
+ * need to double check reference count now that we've got the
+ * src amap locked down. the reference count could have
+ * changed while we were in malloc. if the reference count
+ * dropped down to one we take over the old map rather than
+ * copying the amap.
+ */
+
+ if (srcamap->am_ref == 1) { /* take it over? */
+ entry->etype &= ~UVM_ET_NEEDSCOPY;
+ amap->am_ref--; /* drop final reference to map */
+ amap_free(amap); /* dispose of new (unused) amap */
+ amap_unlock(srcamap);
+ return;
+ }
+
+ /*
+ * we must copy it now.
+ */
+
+ UVMHIST_LOG(maphist, " copying amap now",0, 0, 0, 0);
+ for (lcv = 0 ; lcv < slots; lcv++) {
+ amap->am_anon[lcv] =
+ srcamap->am_anon[entry->aref.ar_pageoff + lcv];
+ if (amap->am_anon[lcv] == NULL)
+ continue;
+ simple_lock(&amap->am_anon[lcv]->an_lock);
+ amap->am_anon[lcv]->an_ref++;
+ simple_unlock(&amap->am_anon[lcv]->an_lock);
+ amap->am_bckptr[lcv] = amap->am_nused;
+ amap->am_slots[amap->am_nused] = lcv;
+ amap->am_nused++;
+ }
+
+ /*
+ * drop our reference to the old amap (srcamap) and unlock.
+ * we know that the reference count on srcamap is greater than
+ * one (we checked above), so there is no way we could drop
+ * the count to zero. [and no need to worry about freeing it]
+ */
+
+ srcamap->am_ref--;
+ if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
+ srcamap->am_flags &= ~AMAP_SHARED; /* clear shared flag */
+#ifdef UVM_AMAP_PPREF
+ if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
+ amap_pp_adjref(srcamap, entry->aref.ar_pageoff,
+ entry->end - entry->start, -1);
+ }
+#endif
+
+ amap_unlock(srcamap);
+
+ /*
+ * install new amap.
+ */
+
+ entry->aref.ar_pageoff = 0;
+ entry->aref.ar_amap = amap;
+ entry->etype &= ~UVM_ET_NEEDSCOPY;
+
+ /*
+ * done!
+ */
+ UVMHIST_LOG(maphist, "<- done",0, 0, 0, 0);
+}
+
+/*
+ * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
+ *
+ * called during fork(2) when the parent process has a wired map
+ * entry. in that case we want to avoid write-protecting pages
+ * in the parent's map (e.g. like what you'd do for a COW page)
+ * so we resolve the COW here.
+ *
+ * => assume parent's entry was wired, thus all pages are resident.
+ * => assume pages that are loaned out (loan_count) are already mapped
+ * read-only in all maps, and thus no need for us to worry about them
+ * => assume both parent and child vm_map's are locked
+ * => caller passes child's map/entry in to us
+ * => if we run out of memory we will unlock the amap and sleep _with_ the
+ * parent and child vm_map's locked(!). we have to do this since
+ * we are in the middle of a fork(2) and we can't let the parent
+ * map change until we are done copying all the map entrys.
+ * => XXXCDC: out of memory should cause fork to fail, but there is
+ * currently no easy way to do this (needs fix)
+ * => page queues must be unlocked (we may lock them)
+ */
+
+void
+amap_cow_now(map, entry)
+ struct vm_map *map;
+ struct vm_map_entry *entry;
+{
+ struct vm_amap *amap = entry->aref.ar_amap;
+ int lcv, slot;
+ struct vm_anon *anon, *nanon;
+ struct vm_page *pg, *npg;
+
+ /*
+ * note that if we unlock the amap then we must ReStart the "lcv" for
+ * loop because some other process could reorder the anon's in the
+ * am_anon[] array on us while the lock is dropped.
+ */
+ReStart:
+ amap_lock(amap);
+
+ for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
+
+ /*
+ * get the page
+ */
+
+ slot = amap->am_slots[lcv];
+ anon = amap->am_anon[slot];
+ simple_lock(&anon->an_lock);
+ pg = anon->u.an_page;
+
+ /*
+ * page must be resident since parent is wired
+ */
+
+ if (pg == NULL)
+ panic("amap_cow_now: non-resident wired page in anon %p",
+ anon);
+
+ /*
+ * if the anon ref count is one and the page is not loaned,
+ * then we are safe (the child has exclusive access to the
+ * page). if the page is loaned, then it must already be
+ * mapped read-only.
+ *
+ * we only need to get involved when these are not true.
+ * [note: if loan_count == 0, then the anon must own the page]
+ */
+
+ if (anon->an_ref > 1 && pg->loan_count == 0) {
+
+ /*
+ * if the page is busy then we have to unlock, wait for
+ * it and then restart.
+ */
+ if (pg->flags & PG_BUSY) {
+ pg->flags |= PG_WANTED;
+ amap_unlock(amap);
+ UVM_UNLOCK_AND_WAIT(pg, &anon->an_lock, FALSE,
+ "cownow", 0);
+ goto ReStart;
+ }
+
+ /*
+ * ok, time to do a copy-on-write to a new anon
+ */
+ nanon = uvm_analloc();
+ if (nanon)
+ npg = uvm_pagealloc(NULL, 0, nanon);
+ else
+ npg = NULL; /* XXX: quiet gcc warning */
+
+ if (nanon == NULL || npg == NULL) {
+ /* out of memory */
+ /*
+ * XXXCDC: we should cause fork to fail, but
+ * we can't ...
+ */
+ if (nanon)
+ uvm_anfree(nanon);
+ simple_unlock(&anon->an_lock);
+ amap_unlock(amap);
+ uvm_wait("cownowpage");
+ goto ReStart;
+ }
+
+ /*
+ * got it... now we can copy the data and replace anon
+ * with our new one...
+ */
+ uvm_pagecopy(pg, npg); /* old -> new */
+ anon->an_ref--; /* can't drop to zero */
+ amap->am_anon[slot] = nanon; /* replace */
+
+ /*
+ * drop PG_BUSY on new page ... since we have had it's
+ * owner locked the whole time it can't be
+ * PG_RELEASED | PG_WANTED.
+ */
+ npg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(npg, NULL);
+ uvm_lock_pageq();
+ uvm_pageactivate(npg);
+ uvm_unlock_pageq();
+ }
+
+ simple_unlock(&anon->an_lock);
+ /*
+ * done with this anon, next ...!
+ */
+
+ } /* end of 'for' loop */
+
+ return;
+}
+
+/*
+ * amap_splitref: split a single reference into two seperate references
+ *
+ * => called from uvm_map's clip routines
+ * => origref's map should be locked
+ * => origref->ar_amap should be unlocked (we will lock)
+ */
+void
+amap_splitref(origref, splitref, offset)
+ struct vm_aref *origref, *splitref;
+ vaddr_t offset;
+{
+ int leftslots;
+ UVMHIST_FUNC("amap_splitref"); UVMHIST_CALLED(maphist);
+
+ AMAP_B2SLOT(leftslots, offset);
+ if (leftslots == 0)
+ panic("amap_splitref: split at zero offset");
+
+ /*
+ * lock the amap
+ */
+ amap_lock(origref->ar_amap);
+
+ /*
+ * now: amap is locked and we have a valid am_mapped array.
+ */
+
+ if (origref->ar_amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
+ panic("amap_splitref: map size check failed");
+
+#ifdef UVM_AMAP_PPREF
+ /*
+ * establish ppref before we add a duplicate reference to the amap
+ */
+ if (origref->ar_amap->am_ppref == NULL)
+ amap_pp_establish(origref->ar_amap);
+#endif
+
+ splitref->ar_amap = origref->ar_amap;
+ splitref->ar_amap->am_ref++; /* not a share reference */
+ splitref->ar_pageoff = origref->ar_pageoff + leftslots;
+
+ amap_unlock(origref->ar_amap);
+}
+
+#ifdef UVM_AMAP_PPREF
+
+/*
+ * amap_pp_establish: add a ppref array to an amap, if possible
+ *
+ * => amap locked by caller
+ */
+void
+amap_pp_establish(amap)
+ struct vm_amap *amap;
+{
+
+ MALLOC(amap->am_ppref, int *, sizeof(int) * amap->am_maxslot,
+ M_UVMAMAP, M_NOWAIT);
+
+ /*
+ * if we fail then we just won't use ppref for this amap
+ */
+ if (amap->am_ppref == NULL) {
+ amap->am_ppref = PPREF_NONE; /* not using it */
+ return;
+ }
+
+ /*
+ * init ppref
+ */
+ bzero(amap->am_ppref, sizeof(int) * amap->am_maxslot);
+ pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
+ return;
+}
+
+/*
+ * amap_pp_adjref: adjust reference count to a part of an amap using the
+ * per-page reference count array.
+ *
+ * => map and amap locked by caller
+ * => caller must check that ppref != PPREF_NONE before calling
+ */
+void
+amap_pp_adjref(amap, curslot, bytelen, adjval)
+ struct vm_amap *amap;
+ int curslot;
+ vsize_t bytelen;
+ int adjval;
+{
+ int slots, stopslot, *ppref, lcv;
+ int ref, len;
+
+ /*
+ * get init values
+ */
+
+ AMAP_B2SLOT(slots, bytelen);
+ stopslot = curslot + slots;
+ ppref = amap->am_ppref;
+
+ /*
+ * first advance to the correct place in the ppref array, fragment
+ * if needed.
+ */
+
+ for (lcv = 0 ; lcv < curslot ; lcv += len) {
+ pp_getreflen(ppref, lcv, &ref, &len);
+ if (lcv + len > curslot) { /* goes past start? */
+ pp_setreflen(ppref, lcv, ref, curslot - lcv);
+ pp_setreflen(ppref, curslot, ref, len - (curslot -lcv));
+ len = curslot - lcv; /* new length of entry @ lcv */
+ }
+ }
+
+ /*
+ * now adjust reference counts in range (make sure we dont overshoot)
+ */
+
+ if (lcv != curslot)
+ panic("amap_pp_adjref: overshot target");
+
+ for (/* lcv already set */; lcv < stopslot ; lcv += len) {
+ pp_getreflen(ppref, lcv, &ref, &len);
+ if (lcv + len > stopslot) { /* goes past end? */
+ pp_setreflen(ppref, lcv, ref, stopslot - lcv);
+ pp_setreflen(ppref, stopslot, ref,
+ len - (stopslot - lcv));
+ len = stopslot - lcv;
+ }
+ ref = ref + adjval; /* ADJUST! */
+ if (ref < 0)
+ panic("amap_pp_adjref: negative reference count");
+ pp_setreflen(ppref, lcv, ref, len);
+ if (ref == 0)
+ amap_wiperange(amap, lcv, len);
+ }
+
+}
+
+/*
+ * amap_wiperange: wipe out a range of an amap
+ * [different from amap_wipeout because the amap is kept intact]
+ *
+ * => both map and amap must be locked by caller.
+ */
+void
+amap_wiperange(amap, slotoff, slots)
+ struct vm_amap *amap;
+ int slotoff, slots;
+{
+ int byanon, lcv, stop, curslot, ptr;
+ struct vm_anon *anon;
+ UVMHIST_FUNC("amap_wiperange"); UVMHIST_CALLED(maphist);
+
+ /*
+ * we can either traverse the amap by am_anon or by am_slots depending
+ * on which is cheaper. decide now.
+ */
+
+ if (slots < amap->am_nused) {
+ byanon = TRUE;
+ lcv = slotoff;
+ stop = slotoff + slots;
+ } else {
+ byanon = FALSE;
+ lcv = 0;
+ stop = amap->am_nused;
+ }
+
+ /*
+ * ok, now do it!
+ */
+
+ for (; lcv < stop; lcv++) {
+ int refs;
+
+ /*
+ * verify the anon is ok.
+ */
+ if (byanon) {
+ if (amap->am_anon[lcv] == NULL)
+ continue;
+ curslot = lcv;
+ } else {
+ curslot = amap->am_slots[lcv];
+ if (curslot < slotoff || curslot >= stop)
+ continue;
+ }
+ anon = amap->am_anon[curslot];
+
+ /*
+ * remove it from the amap
+ */
+ amap->am_anon[curslot] = NULL;
+ ptr = amap->am_bckptr[curslot];
+ if (ptr != (amap->am_nused - 1)) {
+ amap->am_slots[ptr] =
+ amap->am_slots[amap->am_nused - 1];
+ amap->am_bckptr[amap->am_slots[ptr]] =
+ ptr; /* back ptr. */
+ }
+ amap->am_nused--;
+
+ /*
+ * drop anon reference count
+ */
+ simple_lock(&anon->an_lock);
+ refs = --anon->an_ref;
+ simple_unlock(&anon->an_lock);
+ if (refs == 0) {
+ /*
+ * we just eliminated the last reference to an anon.
+ * free it.
+ */
+ uvm_anfree(anon);
+ }
+ }
+}
+
+#endif
diff --git a/sys/uvm/uvm_amap.h b/sys/uvm/uvm_amap.h
new file mode 100644
index 00000000000..8783790017f
--- /dev/null
+++ b/sys/uvm/uvm_amap.h
@@ -0,0 +1,282 @@
+/* $NetBSD: uvm_amap.h,v 1.10 1999/01/28 14:46:27 chuck Exp $ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UVM_UVM_AMAP_H_
+#define _UVM_UVM_AMAP_H_
+
+/*
+ * uvm_amap.h: general amap interface and amap implementation-specific info
+ */
+
+/*
+ * an amap structure contains pointers to a set of anons that are
+ * mapped together in virtual memory (an anon is a single page of
+ * anonymous virtual memory -- see uvm_anon.h). in uvm we hide the
+ * details of the implementation of amaps behind a general amap
+ * interface. this allows us to change the amap implementation
+ * without having to touch the rest of the code. this file is divided
+ * into two parts: the definition of the uvm amap interface and the
+ * amap implementation-specific definitions.
+ */
+
+/*
+ * part 1: amap interface
+ */
+
+/*
+ * forward definition of vm_amap structure. only amap
+ * implementation-specific code should directly access the fields of
+ * this structure.
+ */
+
+struct vm_amap;
+
+/*
+ * handle inline options... we allow amap ops to be inline, but we also
+ * provide a hook to turn this off. macros can also be used.
+ */
+
+#ifdef UVM_AMAP_INLINE /* defined/undef'd in uvm_amap.c */
+#define AMAP_INLINE static __inline /* inline enabled */
+#else
+#define AMAP_INLINE /* inline disabled */
+#endif /* UVM_AMAP_INLINE */
+
+
+/*
+ * prototypes for the amap interface
+ */
+
+AMAP_INLINE
+vaddr_t amap_add /* add an anon to an amap */
+ __P((struct vm_aref *, vaddr_t,
+ struct vm_anon *, int));
+struct vm_amap *amap_alloc /* allocate a new amap */
+ __P((vaddr_t, vaddr_t, int));
+void amap_copy /* clear amap needs-copy flag */
+ __P((vm_map_t, vm_map_entry_t, int,
+ boolean_t, vaddr_t, vaddr_t));
+void amap_cow_now /* resolve all COW faults now */
+ __P((vm_map_t, vm_map_entry_t));
+void amap_extend /* make amap larger */
+ __P((vm_map_entry_t, vsize_t));
+int amap_flags /* get amap's flags */
+ __P((struct vm_amap *));
+void amap_free /* free amap */
+ __P((struct vm_amap *));
+void amap_init /* init amap module (at boot time) */
+ __P((void));
+void amap_lock /* lock amap */
+ __P((struct vm_amap *));
+AMAP_INLINE
+struct vm_anon *amap_lookup /* lookup an anon @ offset in amap */
+ __P((struct vm_aref *, vaddr_t));
+AMAP_INLINE
+void amap_lookups /* lookup multiple anons */
+ __P((struct vm_aref *, vaddr_t,
+ struct vm_anon **, int));
+AMAP_INLINE
+void amap_ref /* add a reference to an amap */
+ __P((vm_map_entry_t, int));
+int amap_refs /* get number of references of amap */
+ __P((struct vm_amap *));
+void amap_share_protect /* protect pages in a shared amap */
+ __P((vm_map_entry_t, vm_prot_t));
+void amap_splitref /* split reference to amap into two */
+ __P((struct vm_aref *, struct vm_aref *,
+ vaddr_t));
+AMAP_INLINE
+void amap_unadd /* remove an anon from an amap */
+ __P((struct vm_amap *, vaddr_t));
+void amap_unlock /* unlock amap */
+ __P((struct vm_amap *));
+AMAP_INLINE
+void amap_unref /* drop reference to an amap */
+ __P((vm_map_entry_t, int));
+void amap_wipeout /* remove all anons from amap */
+ __P((struct vm_amap *));
+
+/*
+ * amap flag values
+ */
+
+#define AMAP_SHARED 0x1 /* amap is shared */
+#define AMAP_REFALL 0x2 /* amap_ref: reference entire amap */
+
+
+/**********************************************************************/
+
+/*
+ * part 2: amap implementation-specific info
+ */
+
+/*
+ * we currently provide an array-based amap implementation. in this
+ * implementation we provide the option of tracking split references
+ * so that we don't lose track of references during partial unmaps
+ * ... this is enabled with the "UVM_AMAP_PPREF" define.
+ */
+
+#define UVM_AMAP_PPREF /* track partial references */
+
+/*
+ * here is the definition of the vm_amap structure for this implementation.
+ */
+
+struct vm_amap {
+ simple_lock_data_t am_l; /* simple lock [locks all vm_amap fields] */
+ int am_ref; /* reference count */
+ int am_flags; /* flags */
+ int am_maxslot; /* max # of slots allocated */
+ int am_nslot; /* # of slots currently in map ( <= maxslot) */
+ int am_nused; /* # of slots currently in use */
+ int *am_slots; /* contig array of active slots */
+ int *am_bckptr; /* back pointer array to am_slots */
+ struct vm_anon **am_anon; /* array of anonymous pages */
+#ifdef UVM_AMAP_PPREF
+ int *am_ppref; /* per page reference count (if !NULL) */
+#endif
+};
+
+/*
+ * note that am_slots, am_bckptr, and am_anon are arrays. this allows
+ * fast lookup of pages based on their virual address at the expense of
+ * some extra memory. in the future we should be smarter about memory
+ * usage and fall back to a non-array based implementation on systems
+ * that are short of memory (XXXCDC).
+ *
+ * the entries in the array are called slots... for example an amap that
+ * covers four pages of virtual memory is said to have four slots. here
+ * is an example of the array usage for a four slot amap. note that only
+ * slots one and three have anons assigned to them. "D/C" means that we
+ * "don't care" about the value.
+ *
+ * 0 1 2 3
+ * am_anon: NULL, anon0, NULL, anon1 (actual pointers to anons)
+ * am_bckptr: D/C, 1, D/C, 0 (points to am_slots entry)
+ *
+ * am_slots: 3, 1, D/C, D/C (says slots 3 and 1 are in use)
+ *
+ * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
+ * to find the entry in am_slots for an anon, look at am_bckptr[slot],
+ * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
+ * in general, if am_anon[X] is non-NULL, then the following must be
+ * true: am_slots[am_bckptr[X]] == X
+ *
+ * note that am_slots is always contig-packed.
+ */
+
+/*
+ * defines for handling of large sparce amaps:
+ *
+ * one of the problems of array-based amaps is that if you allocate a
+ * large sparcely-used area of virtual memory you end up allocating
+ * large arrays that, for the most part, don't get used. this is a
+ * problem for BSD in that the kernel likes to make these types of
+ * allocations to "reserve" memory for possible future use.
+ *
+ * for example, the kernel allocates (reserves) a large chunk of user
+ * VM for possible stack growth. most of the time only a page or two
+ * of this VM is actually used. since the stack is anonymous memory
+ * it makes sense for it to live in an amap, but if we allocated an
+ * amap for the entire stack range we could end up wasting a large
+ * amount of malloc'd KVM.
+ *
+ * for example, on the i386 at boot time we allocate two amaps for the stack
+ * of /sbin/init:
+ * 1. a 7680 slot amap at protection 0 (reserve space for stack)
+ * 2. a 512 slot amap at protection 7 (top of stack)
+ *
+ * most of the array allocated for the amaps for this is never used.
+ * the amap interface provides a way for us to avoid this problem by
+ * allowing amap_copy() to break larger amaps up into smaller sized
+ * chunks (controlled by the "canchunk" option). we use this feature
+ * to reduce our memory usage with the BSD stack management. if we
+ * are asked to create an amap with more than UVM_AMAP_LARGE slots in it,
+ * we attempt to break it up into a UVM_AMAP_CHUNK sized amap if the
+ * "canchunk" flag is set.
+ *
+ * so, in the i386 example, the 7680 slot area is never referenced so
+ * nothing gets allocated (amap_copy is never called because the protection
+ * is zero). the 512 slot area for the top of the stack is referenced.
+ * the chunking code breaks it up into 16 slot chunks (hopefully a single
+ * 16 slot chunk is enough to handle the whole stack).
+ */
+
+#define UVM_AMAP_LARGE 256 /* # of slots in "large" amap */
+#define UVM_AMAP_CHUNK 16 /* # of slots to chunk large amaps in */
+
+
+/*
+ * macros
+ */
+
+/* AMAP_B2SLOT: convert byte offset to slot */
+#ifdef DIAGNOSTIC
+#define AMAP_B2SLOT(S,B) { \
+ if ((B) & (PAGE_SIZE - 1)) \
+ panic("AMAP_B2SLOT: invalid byte count"); \
+ (S) = (B) >> PAGE_SHIFT; \
+}
+#else
+#define AMAP_B2SLOT(S,B) (S) = (B) >> PAGE_SHIFT
+#endif
+
+/*
+ * lock/unlock/refs/flags macros
+ */
+
+#define amap_flags(AMAP) ((AMAP)->am_flags)
+#define amap_lock(AMAP) simple_lock(&(AMAP)->am_l)
+#define amap_refs(AMAP) ((AMAP)->am_ref)
+#define amap_unlock(AMAP) simple_unlock(&(AMAP)->am_l)
+
+/*
+ * if we enable PPREF, then we have a couple of extra functions that
+ * we need to prototype here...
+ */
+
+#ifdef UVM_AMAP_PPREF
+
+#define PPREF_NONE ((int *) -1) /* not using ppref */
+
+void amap_pp_adjref /* adjust references */
+ __P((struct vm_amap *, int, vsize_t, int));
+void amap_pp_establish /* establish ppref */
+ __P((struct vm_amap *));
+void amap_wiperange /* wipe part of an amap */
+ __P((struct vm_amap *, int, int));
+#endif /* UVM_AMAP_PPREF */
+
+#endif /* _UVM_UVM_AMAP_H_ */
diff --git a/sys/uvm/uvm_amap_i.h b/sys/uvm/uvm_amap_i.h
new file mode 100644
index 00000000000..d5bbe11c054
--- /dev/null
+++ b/sys/uvm/uvm_amap_i.h
@@ -0,0 +1,291 @@
+/* $NetBSD: uvm_amap_i.h,v 1.11 1999/01/28 14:46:27 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_amap_i.h,v 1.1.2.4 1998/01/05 18:12:57 chuck Exp
+ */
+
+#ifndef _UVM_UVM_AMAP_I_H_
+#define _UVM_UVM_AMAP_I_H_
+
+/*
+ * uvm_amap_i.h
+ */
+
+/*
+ * if inlines are enabled always pull in these functions, otherwise
+ * pull them in only once (when we are compiling uvm_amap.c).
+ */
+
+#if defined(UVM_AMAP_INLINE) || defined(UVM_AMAP_C)
+
+/*
+ * amap_lookup: look up a page in an amap
+ *
+ * => amap should be locked by caller.
+ */
+AMAP_INLINE struct vm_anon *
+amap_lookup(aref, offset)
+ struct vm_aref *aref;
+ vaddr_t offset;
+{
+ int slot;
+ struct vm_amap *amap = aref->ar_amap;
+ UVMHIST_FUNC("amap_lookup"); UVMHIST_CALLED(maphist);
+
+ AMAP_B2SLOT(slot, offset);
+ slot += aref->ar_pageoff;
+
+ if (slot >= amap->am_nslot)
+ panic("amap_lookup: offset out of range");
+
+ UVMHIST_LOG(maphist, "<- done (amap=0x%x, offset=0x%x, result=0x%x)",
+ amap, offset, amap->am_anon[slot], 0);
+ return(amap->am_anon[slot]);
+}
+
+/*
+ * amap_lookups: look up a range of pages in an amap
+ *
+ * => amap should be locked by caller.
+ * => XXXCDC: this interface is biased toward array-based amaps. fix.
+ */
+AMAP_INLINE void
+amap_lookups(aref, offset, anons, npages)
+ struct vm_aref *aref;
+ vaddr_t offset;
+ struct vm_anon **anons;
+ int npages;
+{
+ int slot;
+ struct vm_amap *amap = aref->ar_amap;
+ UVMHIST_FUNC("amap_lookups"); UVMHIST_CALLED(maphist);
+
+ AMAP_B2SLOT(slot, offset);
+ slot += aref->ar_pageoff;
+
+ UVMHIST_LOG(maphist, " slot=%d, npages=%d, nslot=%d", slot, npages,
+ amap->am_nslot, 0);
+
+ if ((slot + (npages - 1)) >= amap->am_nslot)
+ panic("amap_lookups: offset out of range");
+
+ bcopy(&amap->am_anon[slot], anons, npages * sizeof(struct vm_anon *));
+
+ UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
+ return;
+}
+
+/*
+ * amap_add: add (or replace) a page to an amap
+ *
+ * => caller must lock amap.
+ * => if (replace) caller must lock anon because we might have to call
+ * pmap_page_protect on the anon's page.
+ * => returns an "offset" which is meaningful to amap_unadd().
+ */
+AMAP_INLINE vaddr_t
+amap_add(aref, offset, anon, replace)
+ struct vm_aref *aref;
+ vaddr_t offset;
+ struct vm_anon *anon;
+ int replace;
+{
+ int slot;
+ struct vm_amap *amap = aref->ar_amap;
+ UVMHIST_FUNC("amap_add"); UVMHIST_CALLED(maphist);
+
+ AMAP_B2SLOT(slot, offset);
+ slot += aref->ar_pageoff;
+
+ if (slot >= amap->am_nslot)
+ panic("amap_add: offset out of range");
+
+ if (replace) {
+
+ if (amap->am_anon[slot] == NULL)
+ panic("amap_add: replacing null anon");
+ if (amap->am_anon[slot]->u.an_page != NULL &&
+ (amap->am_flags & AMAP_SHARED) != 0) {
+ pmap_page_protect(
+ PMAP_PGARG(amap->am_anon[slot]->u.an_page),
+ VM_PROT_NONE);
+ /*
+ * XXX: suppose page is supposed to be wired somewhere?
+ */
+ }
+ } else { /* !replace */
+ if (amap->am_anon[slot] != NULL)
+ panic("amap_add: slot in use");
+
+ amap->am_bckptr[slot] = amap->am_nused;
+ amap->am_slots[amap->am_nused] = slot;
+ amap->am_nused++;
+ }
+ amap->am_anon[slot] = anon;
+ UVMHIST_LOG(maphist,
+ "<- done (amap=0x%x, offset=0x%x, anon=0x%x, rep=%d)",
+ amap, offset, anon, replace);
+
+ return(slot);
+}
+
+/*
+ * amap_unadd: remove a page from an amap, given we know the slot #.
+ *
+ * => caller must lock amap
+ */
+AMAP_INLINE void
+amap_unadd(amap, slot)
+ struct vm_amap *amap;
+ vaddr_t slot;
+{
+ int ptr;
+ UVMHIST_FUNC("amap_unadd"); UVMHIST_CALLED(maphist);
+
+ if (slot >= amap->am_nslot)
+ panic("amap_add: offset out of range");
+
+ if (amap->am_anon[slot] == NULL)
+ panic("amap_unadd: nothing there");
+
+ amap->am_anon[slot] = NULL;
+ ptr = amap->am_bckptr[slot];
+
+ if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */
+ amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
+ amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */
+ }
+ amap->am_nused--;
+ UVMHIST_LOG(maphist, "<- done (amap=0x%x, slot=0x%x)", amap, slot,0, 0);
+}
+
+/*
+ * amap_ref: gain a reference to an amap
+ *
+ * => amap must not be locked (we will lock)
+ * => called at fork time to gain the child's reference
+ */
+AMAP_INLINE void
+amap_ref(entry, flags)
+ vm_map_entry_t entry;
+ int flags;
+{
+ struct vm_amap *amap = entry->aref.ar_amap;
+ UVMHIST_FUNC("amap_ref"); UVMHIST_CALLED(maphist);
+
+ amap_lock(amap);
+ amap->am_ref++;
+ if (flags & AMAP_SHARED)
+ amap->am_flags |= AMAP_SHARED;
+#ifdef UVM_AMAP_PPREF
+ if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
+ (entry->start - entry->end) >> PAGE_SHIFT != amap->am_nslot)
+ amap_pp_establish(amap);
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+ if (flags & AMAP_REFALL)
+ amap_pp_adjref(amap, 0, amap->am_nslot << PAGE_SHIFT, 1);
+ else
+ amap_pp_adjref(amap, entry->aref.ar_pageoff,
+ entry->end - entry->start, 1);
+ }
+#endif
+ amap_unlock(amap);
+ UVMHIST_LOG(maphist,"<- done! amap=0x%x", amap, 0, 0, 0);
+}
+
+/*
+ * amap_unref: remove a reference to an amap
+ *
+ * => caller must remove all pmap-level references to this amap before
+ * dropping the reference
+ * => called from uvm_unmap_detach [only] ... note that entry is no
+ * longer part of a map and thus has no need for locking
+ * => amap must be unlocked (we will lock it).
+ */
+AMAP_INLINE void
+amap_unref(entry, all)
+ vm_map_entry_t entry;
+ int all;
+{
+ struct vm_amap *amap = entry->aref.ar_amap;
+ UVMHIST_FUNC("amap_unref"); UVMHIST_CALLED(maphist);
+
+ /*
+ * lock it
+ */
+ amap_lock(amap);
+
+ UVMHIST_LOG(maphist,"(entry=0x%x) amap=0x%x refs=%d, nused=%d",
+ entry, amap, amap->am_ref, amap->am_nused);
+
+ /*
+ * if we are the last reference, free the amap and return.
+ */
+
+ if (amap->am_ref == 1) {
+ amap_wipeout(amap); /* drops final ref and frees */
+ UVMHIST_LOG(maphist,"<- done (was last ref)!", 0, 0, 0, 0);
+ return; /* no need to unlock */
+ }
+
+ /*
+ * otherwise just drop the reference count(s)
+ */
+
+ amap->am_ref--;
+ if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
+ amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */
+#ifdef UVM_AMAP_PPREF
+ if (amap->am_ppref == NULL && all == 0 &&
+ (entry->start - entry->end) >> PAGE_SHIFT != amap->am_nslot)
+ amap_pp_establish(amap);
+ if (amap->am_ppref && amap->am_ppref != PPREF_NONE) {
+ if (all)
+ amap_pp_adjref(amap, 0, amap->am_nslot << PAGE_SHIFT, -1);
+ else
+ amap_pp_adjref(amap, entry->aref.ar_pageoff,
+ entry->end - entry->start, -1);
+ }
+#endif
+ amap_unlock(amap);
+
+ UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+}
+
+#endif /* defined(UVM_AMAP_INLINE) || defined(UVM_AMAP_C) */
+
+#endif /* _UVM_UVM_AMAP_I_H_ */
diff --git a/sys/uvm/uvm_anon.c b/sys/uvm/uvm_anon.c
new file mode 100644
index 00000000000..214e12df701
--- /dev/null
+++ b/sys/uvm/uvm_anon.c
@@ -0,0 +1,345 @@
+/* $NetBSD: uvm_anon.c,v 1.1 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_anon.c: uvm anon ops
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_swap.h>
+
+/*
+ * allocate anons
+ */
+void
+uvm_anon_init()
+{
+ struct vm_anon *anon;
+ int nanon = uvmexp.free - (uvmexp.free / 16); /* XXXCDC ??? */
+ int lcv;
+
+ /*
+ * Allocate the initial anons.
+ */
+ anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
+ sizeof(*anon) * nanon);
+ if (anon == NULL) {
+ printf("uvm_anon_init: can not allocate %d anons\n", nanon);
+ panic("uvm_anon_init");
+ }
+
+ bzero(anon, sizeof(*anon) * nanon);
+ uvm.afree = NULL;
+ uvmexp.nanon = uvmexp.nfreeanon = nanon;
+ for (lcv = 0 ; lcv < nanon ; lcv++) {
+ anon[lcv].u.an_nxt = uvm.afree;
+ uvm.afree = &anon[lcv];
+ }
+ simple_lock_init(&uvm.afreelock);
+}
+
+/*
+ * add some more anons to the free pool. called when we add
+ * more swap space.
+ */
+void
+uvm_anon_add(pages)
+ int pages;
+{
+ struct vm_anon *anon;
+ int lcv;
+
+ anon = (struct vm_anon *)uvm_km_alloc(kernel_map,
+ sizeof(*anon) * pages);
+
+ /* XXX Should wait for VM to free up. */
+ if (anon == NULL) {
+ printf("uvm_anon_add: can not allocate %d anons\n", pages);
+ panic("uvm_anon_add");
+ }
+
+ simple_lock(&uvm.afreelock);
+ bzero(anon, sizeof(*anon) * pages);
+ uvmexp.nanon += pages;
+ uvmexp.nfreeanon += pages;
+ for (lcv = 0; lcv < pages; lcv++) {
+ simple_lock_init(&anon->an_lock);
+ anon[lcv].u.an_nxt = uvm.afree;
+ uvm.afree = &anon[lcv];
+ }
+ simple_unlock(&uvm.afreelock);
+}
+
+/*
+ * allocate an anon
+ */
+struct vm_anon *
+uvm_analloc()
+{
+ struct vm_anon *a;
+
+ simple_lock(&uvm.afreelock);
+ a = uvm.afree;
+ if (a) {
+ uvm.afree = a->u.an_nxt;
+ uvmexp.nfreeanon--;
+ a->an_ref = 1;
+ a->an_swslot = 0;
+ a->u.an_page = NULL; /* so we can free quickly */
+ }
+ simple_unlock(&uvm.afreelock);
+ return(a);
+}
+
+/*
+ * uvm_anfree: free a single anon structure
+ *
+ * => caller must remove anon from its amap before calling (if it was in
+ * an amap).
+ * => anon must be unlocked and have a zero reference count.
+ * => we may lock the pageq's.
+ */
+void
+uvm_anfree(anon)
+ struct vm_anon *anon;
+{
+ struct vm_page *pg;
+ UVMHIST_FUNC("uvm_anfree"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(anon=0x%x)", anon, 0,0,0);
+
+ /*
+ * get page
+ */
+
+ pg = anon->u.an_page;
+
+ /*
+ * if there is a resident page and it is loaned, then anon may not
+ * own it. call out to uvm_anon_lockpage() to ensure the real owner
+ * of the page has been identified and locked.
+ */
+
+ if (pg && pg->loan_count)
+ pg = uvm_anon_lockloanpg(anon);
+
+ /*
+ * if we have a resident page, we must dispose of it before freeing
+ * the anon.
+ */
+
+ if (pg) {
+
+ /*
+ * if the page is owned by a uobject (now locked), then we must
+ * kill the loan on the page rather than free it.
+ */
+
+ if (pg->uobject) {
+
+ /* kill loan */
+ uvm_lock_pageq();
+#ifdef DIAGNOSTIC
+ if (pg->loan_count < 1)
+ panic("uvm_anfree: obj owned page "
+ "with no loan count");
+#endif
+ pg->loan_count--;
+ pg->uanon = NULL;
+ uvm_unlock_pageq();
+ simple_unlock(&pg->uobject->vmobjlock);
+
+ } else {
+
+ /*
+ * page has no uobject, so we must be the owner of it.
+ *
+ * if page is busy then we just mark it as released
+ * (who ever has it busy must check for this when they
+ * wake up). if the page is not busy then we can
+ * free it now.
+ */
+
+ if ((pg->flags & PG_BUSY) != 0) {
+ /* tell them to dump it when done */
+ pg->flags |= PG_RELEASED;
+ simple_unlock(&anon->an_lock);
+ UVMHIST_LOG(maphist,
+ " anon 0x%x, page 0x%x: BUSY (released!)",
+ anon, pg, 0, 0);
+ return;
+ }
+
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+ uvm_lock_pageq(); /* lock out pagedaemon */
+ uvm_pagefree(pg); /* bye bye */
+ uvm_unlock_pageq(); /* free the daemon */
+
+ UVMHIST_LOG(maphist," anon 0x%x, page 0x%x: freed now!",
+ anon, pg, 0, 0);
+ }
+ }
+
+ /*
+ * are we using any backing store resources? if so, free them.
+ */
+ if (anon->an_swslot) {
+ /*
+ * on backing store: no I/O in progress. sole amap reference
+ * is ours and we've got it locked down. thus we can free,
+ * and be done.
+ */
+ UVMHIST_LOG(maphist," freeing anon 0x%x, paged to swslot 0x%x",
+ anon, anon->an_swslot, 0, 0);
+ uvm_swap_free(anon->an_swslot, 1);
+ anon->an_swslot = 0;
+ }
+
+ /*
+ * now that we've stripped the data areas from the anon, free the anon
+ * itself!
+ */
+ simple_lock(&uvm.afreelock);
+ anon->u.an_nxt = uvm.afree;
+ uvm.afree = anon;
+ uvmexp.nfreeanon++;
+ simple_unlock(&uvm.afreelock);
+ UVMHIST_LOG(maphist,"<- done!",0,0,0,0);
+}
+
+/*
+ * uvm_anon_lockloanpg: given a locked anon, lock its resident page
+ *
+ * => anon is locked by caller
+ * => on return: anon is locked
+ * if there is a resident page:
+ * if it has a uobject, it is locked by us
+ * if it is ownerless, we take over as owner
+ * we return the resident page (it can change during
+ * this function)
+ * => note that the only time an anon has an ownerless resident page
+ * is if the page was loaned from a uvm_object and the uvm_object
+ * disowned it
+ * => this only needs to be called when you want to do an operation
+ * on an anon's resident page and that page has a non-zero loan
+ * count.
+ */
+struct vm_page *
+uvm_anon_lockloanpg(anon)
+ struct vm_anon *anon;
+{
+ struct vm_page *pg;
+ boolean_t locked = FALSE;
+
+ /*
+ * loop while we have a resident page that has a non-zero loan count.
+ * if we successfully get our lock, we will "break" the loop.
+ * note that the test for pg->loan_count is not protected -- this
+ * may produce false positive results. note that a false positive
+ * result may cause us to do more work than we need to, but it will
+ * not produce an incorrect result.
+ */
+
+ while (((pg = anon->u.an_page) != NULL) && pg->loan_count != 0) {
+
+ /*
+ * quickly check to see if the page has an object before
+ * bothering to lock the page queues. this may also produce
+ * a false positive result, but that's ok because we do a real
+ * check after that.
+ *
+ * XXX: quick check -- worth it? need volatile?
+ */
+
+ if (pg->uobject) {
+
+ uvm_lock_pageq();
+ if (pg->uobject) { /* the "real" check */
+ locked =
+ simple_lock_try(&pg->uobject->vmobjlock);
+ } else {
+ /* object disowned before we got PQ lock */
+ locked = TRUE;
+ }
+ uvm_unlock_pageq();
+
+ /*
+ * if we didn't get a lock (try lock failed), then we
+ * toggle our anon lock and try again
+ */
+
+ if (!locked) {
+ simple_unlock(&anon->an_lock);
+ /*
+ * someone locking the object has a chance to
+ * lock us right now
+ */
+ simple_lock(&anon->an_lock);
+ continue; /* start over */
+ }
+ }
+
+ /*
+ * if page is un-owned [i.e. the object dropped its ownership],
+ * then we can take over as owner!
+ */
+
+ if (pg->uobject == NULL && (pg->pqflags & PQ_ANON) == 0) {
+ uvm_lock_pageq();
+ pg->pqflags |= PQ_ANON; /* take ownership... */
+ pg->loan_count--; /* ... and drop our loan */
+ uvm_unlock_pageq();
+ }
+
+ /*
+ * we did it! break the loop
+ */
+ break;
+ }
+
+ /*
+ * done!
+ */
+
+ return(pg);
+}
diff --git a/sys/uvm/uvm_anon.h b/sys/uvm/uvm_anon.h
new file mode 100644
index 00000000000..f52f6f646f4
--- /dev/null
+++ b/sys/uvm/uvm_anon.h
@@ -0,0 +1,105 @@
+/* $NetBSD: uvm_anon.h,v 1.9 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UVM_UVM_ANON_H_
+#define _UVM_UVM_ANON_H_
+
+/*
+ * uvm_anon.h
+ */
+
+/*
+ * anonymous memory management
+ *
+ * anonymous virtual memory is short term virtual memory that goes away
+ * when the processes referencing it go away. an anonymous page of
+ * virtual memory is described by the following data structure:
+ */
+
+struct vm_anon {
+ int an_ref; /* reference count [an_lock] */
+ simple_lock_data_t an_lock; /* lock for an_ref */
+ union {
+ struct vm_anon *an_nxt; /* if on free list [afreelock] */
+ struct vm_page *an_page;/* if in RAM [an_lock] */
+ } u;
+ int an_swslot; /* drum swap slot # (if != 0)
+ [an_lock. also, it is ok to read
+ an_swslot if we hold an_page PG_BUSY] */
+};
+
+/*
+ * a pool of vm_anon data structures is allocated and put on a global
+ * free list at boot time. vm_anon's on the free list use "an_nxt" as
+ * a pointer to the next item on the free list. for active vm_anon's
+ * the data can be in one of the following state: [1] in a vm_page
+ * with no backing store allocated yet, [2] in a vm_page with backing
+ * store allocated, or [3] paged out to backing store (no vm_page).
+ *
+ * for pageout in case [2]: if the page has been modified then we must
+ * flush it out to backing store, otherwise we can just dump the
+ * vm_page.
+ */
+
+/*
+ * anons are grouped together in anonymous memory maps, or amaps.
+ * amaps are defined in uvm_amap.h.
+ */
+
+/*
+ * processes reference anonymous virtual memory maps with an anonymous
+ * reference structure:
+ */
+
+struct vm_aref {
+ int ar_pageoff; /* page offset into amap we start */
+ struct vm_amap *ar_amap; /* pointer to amap */
+};
+
+/*
+ * the offset field indicates which part of the amap we are referencing.
+ * locked by vm_map lock.
+ */
+
+/*
+ * prototypes
+ */
+
+struct vm_anon *uvm_analloc __P((void));
+void uvm_anfree __P((struct vm_anon *));
+void uvm_anon_init __P((void));
+void uvm_anon_add __P((int));
+struct vm_page *uvm_anon_lockloanpg __P((struct vm_anon *));
+
+#endif /* _UVM_UVM_ANON_H_ */
diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c
new file mode 100644
index 00000000000..8e0d3fc22ef
--- /dev/null
+++ b/sys/uvm/uvm_aobj.c
@@ -0,0 +1,1090 @@
+/* $NetBSD: uvm_aobj.c,v 1.15 1998/10/18 23:49:59 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
+ * Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_aobj.c,v 1.1.2.5 1998/02/06 05:14:38 chs Exp
+ */
+/*
+ * uvm_aobj.c: anonymous memory uvm_object pager
+ *
+ * author: Chuck Silvers <chuq@chuq.com>
+ * started: Jan-1998
+ *
+ * - design mostly from Chuck Cranor
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * an aobj manages anonymous-memory backed uvm_objects. in addition
+ * to keeping the list of resident pages, it also keeps a list of
+ * allocated swap blocks. depending on the size of the aobj this list
+ * of allocated swap blocks is either stored in an array (small objects)
+ * or in a hash table (large objects).
+ */
+
+/*
+ * local structures
+ */
+
+/*
+ * for hash tables, we break the address space of the aobj into blocks
+ * of UAO_SWHASH_CLUSTER_SIZE pages. we require the cluster size to
+ * be a power of two.
+ */
+
+#define UAO_SWHASH_CLUSTER_SHIFT 4
+#define UAO_SWHASH_CLUSTER_SIZE (1 << UAO_SWHASH_CLUSTER_SHIFT)
+
+/* get the "tag" for this page index */
+#define UAO_SWHASH_ELT_TAG(PAGEIDX) \
+ ((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT)
+
+/* given an ELT and a page index, find the swap slot */
+#define UAO_SWHASH_ELT_PAGESLOT(ELT, PAGEIDX) \
+ ((ELT)->slots[(PAGEIDX) & (UAO_SWHASH_CLUSTER_SIZE - 1)])
+
+/* given an ELT, return its pageidx base */
+#define UAO_SWHASH_ELT_PAGEIDX_BASE(ELT) \
+ ((ELT)->tag << UAO_SWHASH_CLUSTER_SHIFT)
+
+/*
+ * the swhash hash function
+ */
+#define UAO_SWHASH_HASH(AOBJ, PAGEIDX) \
+ (&(AOBJ)->u_swhash[(((PAGEIDX) >> UAO_SWHASH_CLUSTER_SHIFT) \
+ & (AOBJ)->u_swhashmask)])
+
+/*
+ * the swhash threshhold determines if we will use an array or a
+ * hash table to store the list of allocated swap blocks.
+ */
+
+#define UAO_SWHASH_THRESHOLD (UAO_SWHASH_CLUSTER_SIZE * 4)
+#define UAO_USES_SWHASH(AOBJ) \
+ ((AOBJ)->u_pages > UAO_SWHASH_THRESHOLD) /* use hash? */
+
+/*
+ * the number of buckets in a swhash, with an upper bound
+ */
+#define UAO_SWHASH_MAXBUCKETS 256
+#define UAO_SWHASH_BUCKETS(AOBJ) \
+ (min((AOBJ)->u_pages >> UAO_SWHASH_CLUSTER_SHIFT, \
+ UAO_SWHASH_MAXBUCKETS))
+
+
+/*
+ * uao_swhash_elt: when a hash table is being used, this structure defines
+ * the format of an entry in the bucket list.
+ */
+
+struct uao_swhash_elt {
+ LIST_ENTRY(uao_swhash_elt) list; /* the hash list */
+ vaddr_t tag; /* our 'tag' */
+ int count; /* our number of active slots */
+ int slots[UAO_SWHASH_CLUSTER_SIZE]; /* the slots */
+};
+
+/*
+ * uao_swhash: the swap hash table structure
+ */
+
+LIST_HEAD(uao_swhash, uao_swhash_elt);
+
+/*
+ * uao_swhash_elt_pool: pool of uao_swhash_elt structures
+ */
+
+struct pool uao_swhash_elt_pool;
+
+/*
+ * uvm_aobj: the actual anon-backed uvm_object
+ *
+ * => the uvm_object is at the top of the structure, this allows
+ * (struct uvm_device *) == (struct uvm_object *)
+ * => only one of u_swslots and u_swhash is used in any given aobj
+ */
+
+struct uvm_aobj {
+ struct uvm_object u_obj; /* has: lock, pgops, memq, #pages, #refs */
+ int u_pages; /* number of pages in entire object */
+ int u_flags; /* the flags (see uvm_aobj.h) */
+ int *u_swslots; /* array of offset->swapslot mappings */
+ /*
+ * hashtable of offset->swapslot mappings
+ * (u_swhash is an array of bucket heads)
+ */
+ struct uao_swhash *u_swhash;
+ u_long u_swhashmask; /* mask for hashtable */
+ LIST_ENTRY(uvm_aobj) u_list; /* global list of aobjs */
+};
+
+/*
+ * uvm_aobj_pool: pool of uvm_aobj structures
+ */
+
+struct pool uvm_aobj_pool;
+
+/*
+ * local functions
+ */
+
+static void uao_init __P((void));
+static struct uao_swhash_elt *uao_find_swhash_elt __P((struct uvm_aobj *,
+ int, boolean_t));
+static int uao_find_swslot __P((struct uvm_aobj *,
+ int));
+static boolean_t uao_flush __P((struct uvm_object *,
+ vaddr_t, vaddr_t,
+ int));
+static void uao_free __P((struct uvm_aobj *));
+static int uao_get __P((struct uvm_object *, vaddr_t,
+ vm_page_t *, int *, int,
+ vm_prot_t, int, int));
+static boolean_t uao_releasepg __P((struct vm_page *,
+ struct vm_page **));
+
+
+
+/*
+ * aobj_pager
+ *
+ * note that some functions (e.g. put) are handled elsewhere
+ */
+
+struct uvm_pagerops aobj_pager = {
+ uao_init, /* init */
+ NULL, /* attach */
+ uao_reference, /* reference */
+ uao_detach, /* detach */
+ NULL, /* fault */
+ uao_flush, /* flush */
+ uao_get, /* get */
+ NULL, /* asyncget */
+ NULL, /* put (done by pagedaemon) */
+ NULL, /* cluster */
+ NULL, /* mk_pcluster */
+ uvm_shareprot, /* shareprot */
+ NULL, /* aiodone */
+ uao_releasepg /* releasepg */
+};
+
+/*
+ * uao_list: global list of active aobjs, locked by uao_list_lock
+ */
+
+static LIST_HEAD(aobjlist, uvm_aobj) uao_list;
+static simple_lock_data_t uao_list_lock;
+
+
+/*
+ * functions
+ */
+
+/*
+ * hash table/array related functions
+ */
+
+/*
+ * uao_find_swhash_elt: find (or create) a hash table entry for a page
+ * offset.
+ *
+ * => the object should be locked by the caller
+ */
+
+static struct uao_swhash_elt *
+uao_find_swhash_elt(aobj, pageidx, create)
+ struct uvm_aobj *aobj;
+ int pageidx;
+ boolean_t create;
+{
+ struct uao_swhash *swhash;
+ struct uao_swhash_elt *elt;
+ int page_tag;
+
+ swhash = UAO_SWHASH_HASH(aobj, pageidx); /* first hash to get bucket */
+ page_tag = UAO_SWHASH_ELT_TAG(pageidx); /* tag to search for */
+
+ /*
+ * now search the bucket for the requested tag
+ */
+ for (elt = swhash->lh_first; elt != NULL; elt = elt->list.le_next) {
+ if (elt->tag == page_tag)
+ return(elt);
+ }
+
+ /* fail now if we are not allowed to create a new entry in the bucket */
+ if (!create)
+ return NULL;
+
+
+ /*
+ * allocate a new entry for the bucket and init/insert it in
+ */
+ elt = pool_get(&uao_swhash_elt_pool, PR_WAITOK);
+ LIST_INSERT_HEAD(swhash, elt, list);
+ elt->tag = page_tag;
+ elt->count = 0;
+ bzero(elt->slots, sizeof(elt->slots));
+
+ return(elt);
+}
+
+/*
+ * uao_find_swslot: find the swap slot number for an aobj/pageidx
+ *
+ * => object must be locked by caller
+ */
+__inline static int
+uao_find_swslot(aobj, pageidx)
+ struct uvm_aobj *aobj;
+ int pageidx;
+{
+
+ /*
+ * if noswap flag is set, then we never return a slot
+ */
+
+ if (aobj->u_flags & UAO_FLAG_NOSWAP)
+ return(0);
+
+ /*
+ * if hashing, look in hash table.
+ */
+
+ if (UAO_USES_SWHASH(aobj)) {
+ struct uao_swhash_elt *elt =
+ uao_find_swhash_elt(aobj, pageidx, FALSE);
+
+ if (elt)
+ return(UAO_SWHASH_ELT_PAGESLOT(elt, pageidx));
+ else
+ return(NULL);
+ }
+
+ /*
+ * otherwise, look in the array
+ */
+ return(aobj->u_swslots[pageidx]);
+}
+
+/*
+ * uao_set_swslot: set the swap slot for a page in an aobj.
+ *
+ * => setting a slot to zero frees the slot
+ * => object must be locked by caller
+ */
+int
+uao_set_swslot(uobj, pageidx, slot)
+ struct uvm_object *uobj;
+ int pageidx, slot;
+{
+ struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+ int oldslot;
+ UVMHIST_FUNC("uao_set_swslot"); UVMHIST_CALLED(pdhist);
+ UVMHIST_LOG(pdhist, "aobj %p pageidx %d slot %d",
+ aobj, pageidx, slot, 0);
+
+ /*
+ * if noswap flag is set, then we can't set a slot
+ */
+
+ if (aobj->u_flags & UAO_FLAG_NOSWAP) {
+
+ if (slot == 0)
+ return(0); /* a clear is ok */
+
+ /* but a set is not */
+ printf("uao_set_swslot: uobj = %p\n", uobj);
+ panic("uao_set_swslot: attempt to set a slot on a NOSWAP object");
+ }
+
+ /*
+ * are we using a hash table? if so, add it in the hash.
+ */
+
+ if (UAO_USES_SWHASH(aobj)) {
+ /*
+ * Avoid allocating an entry just to free it again if
+ * the page had not swap slot in the first place, and
+ * we are freeing.
+ */
+ struct uao_swhash_elt *elt =
+ uao_find_swhash_elt(aobj, pageidx, slot ? TRUE : FALSE);
+ if (elt == NULL) {
+#ifdef DIAGNOSTIC
+ if (slot)
+ panic("uao_set_swslot: didn't create elt");
+#endif
+ return (0);
+ }
+
+ oldslot = UAO_SWHASH_ELT_PAGESLOT(elt, pageidx);
+ UAO_SWHASH_ELT_PAGESLOT(elt, pageidx) = slot;
+
+ /*
+ * now adjust the elt's reference counter and free it if we've
+ * dropped it to zero.
+ */
+
+ /* an allocation? */
+ if (slot) {
+ if (oldslot == 0)
+ elt->count++;
+ } else { /* freeing slot ... */
+ if (oldslot) /* to be safe */
+ elt->count--;
+
+ if (elt->count == 0) {
+ LIST_REMOVE(elt, list);
+ pool_put(&uao_swhash_elt_pool, elt);
+ }
+ }
+
+ } else {
+ /* we are using an array */
+ oldslot = aobj->u_swslots[pageidx];
+ aobj->u_swslots[pageidx] = slot;
+ }
+ return (oldslot);
+}
+
+/*
+ * end of hash/array functions
+ */
+
+/*
+ * uao_free: free all resources held by an aobj, and then free the aobj
+ *
+ * => the aobj should be dead
+ */
+static void
+uao_free(aobj)
+ struct uvm_aobj *aobj;
+{
+
+ if (UAO_USES_SWHASH(aobj)) {
+ int i, hashbuckets = aobj->u_swhashmask + 1;
+
+ /*
+ * free the swslots from each hash bucket,
+ * then the hash bucket, and finally the hash table itself.
+ */
+ for (i = 0; i < hashbuckets; i++) {
+ struct uao_swhash_elt *elt, *next;
+
+ for (elt = aobj->u_swhash[i].lh_first; elt != NULL;
+ elt = next) {
+ int j;
+
+ for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++)
+ {
+ int slot = elt->slots[j];
+
+ if (slot)
+ uvm_swap_free(slot, 1);
+ }
+
+ next = elt->list.le_next;
+ pool_put(&uao_swhash_elt_pool, elt);
+ }
+ }
+ FREE(aobj->u_swhash, M_UVMAOBJ);
+ } else {
+ int i;
+
+ /*
+ * free the array
+ */
+
+ for (i = 0; i < aobj->u_pages; i++)
+ {
+ int slot = aobj->u_swslots[i];
+
+ if (slot)
+ uvm_swap_free(slot, 1);
+ }
+ FREE(aobj->u_swslots, M_UVMAOBJ);
+ }
+
+ /*
+ * finally free the aobj itself
+ */
+ pool_put(&uvm_aobj_pool, aobj);
+}
+
+/*
+ * pager functions
+ */
+
+/*
+ * uao_create: create an aobj of the given size and return its uvm_object.
+ *
+ * => for normal use, flags are always zero
+ * => for the kernel object, the flags are:
+ * UAO_FLAG_KERNOBJ - allocate the kernel object (can only happen once)
+ * UAO_FLAG_KERNSWAP - enable swapping of kernel object (" ")
+ */
+struct uvm_object *
+uao_create(size, flags)
+ vsize_t size;
+ int flags;
+{
+ static struct uvm_aobj kernel_object_store; /* home of kernel_object */
+ static int kobj_alloced = 0; /* not allocated yet */
+ int pages = round_page(size) >> PAGE_SHIFT;
+ struct uvm_aobj *aobj;
+
+ /*
+ * malloc a new aobj unless we are asked for the kernel object
+ */
+ if (flags & UAO_FLAG_KERNOBJ) { /* want kernel object? */
+ if (kobj_alloced)
+ panic("uao_create: kernel object already allocated");
+
+ /*
+ * XXXTHORPEJ: Need to call this now, so the pool gets
+ * initialized!
+ */
+ uao_init();
+
+ aobj = &kernel_object_store;
+ aobj->u_pages = pages;
+ aobj->u_flags = UAO_FLAG_NOSWAP; /* no swap to start */
+ /* we are special, we never die */
+ aobj->u_obj.uo_refs = UVM_OBJ_KERN;
+ kobj_alloced = UAO_FLAG_KERNOBJ;
+ } else if (flags & UAO_FLAG_KERNSWAP) {
+ aobj = &kernel_object_store;
+ if (kobj_alloced != UAO_FLAG_KERNOBJ)
+ panic("uao_create: asked to enable swap on kernel object");
+ kobj_alloced = UAO_FLAG_KERNSWAP;
+ } else { /* normal object */
+ aobj = pool_get(&uvm_aobj_pool, PR_WAITOK);
+ aobj->u_pages = pages;
+ aobj->u_flags = 0; /* normal object */
+ aobj->u_obj.uo_refs = 1; /* start with 1 reference */
+ }
+
+ /*
+ * allocate hash/array if necessary
+ *
+ * note: in the KERNSWAP case no need to worry about locking since
+ * we are still booting we should be the only thread around.
+ */
+ if (flags == 0 || (flags & UAO_FLAG_KERNSWAP) != 0) {
+ int mflags = (flags & UAO_FLAG_KERNSWAP) != 0 ?
+ M_NOWAIT : M_WAITOK;
+
+ /* allocate hash table or array depending on object size */
+ if (UAO_USES_SWHASH(aobj)) {
+ aobj->u_swhash = newhashinit(UAO_SWHASH_BUCKETS(aobj),
+ M_UVMAOBJ, mflags, &aobj->u_swhashmask);
+ if (aobj->u_swhash == NULL)
+ panic("uao_create: hashinit swhash failed");
+ } else {
+ MALLOC(aobj->u_swslots, int *, pages * sizeof(int),
+ M_UVMAOBJ, mflags);
+ if (aobj->u_swslots == NULL)
+ panic("uao_create: malloc swslots failed");
+ bzero(aobj->u_swslots, pages * sizeof(int));
+ }
+
+ if (flags) {
+ aobj->u_flags &= ~UAO_FLAG_NOSWAP; /* clear noswap */
+ return(&aobj->u_obj);
+ /* done! */
+ }
+ }
+
+ /*
+ * init aobj fields
+ */
+ simple_lock_init(&aobj->u_obj.vmobjlock);
+ aobj->u_obj.pgops = &aobj_pager;
+ TAILQ_INIT(&aobj->u_obj.memq);
+ aobj->u_obj.uo_npages = 0;
+
+ /*
+ * now that aobj is ready, add it to the global list
+ * XXXCHS: uao_init hasn't been called'd in the KERNOBJ case,
+ * do we really need the kernel object on this list anyway?
+ */
+ simple_lock(&uao_list_lock);
+ LIST_INSERT_HEAD(&uao_list, aobj, u_list);
+ simple_unlock(&uao_list_lock);
+
+ /*
+ * done!
+ */
+ return(&aobj->u_obj);
+}
+
+
+
+/*
+ * uao_init: set up aobj pager subsystem
+ *
+ * => called at boot time from uvm_pager_init()
+ */
+static void
+uao_init()
+{
+ static int uao_initialized;
+
+ if (uao_initialized)
+ return;
+ uao_initialized = TRUE;
+
+ LIST_INIT(&uao_list);
+ simple_lock_init(&uao_list_lock);
+
+ /*
+ * NOTE: Pages fror this pool must not come from a pageable
+ * kernel map!
+ */
+ pool_init(&uao_swhash_elt_pool, sizeof(struct uao_swhash_elt),
+ 0, 0, 0, "uaoeltpl", 0, NULL, NULL, M_UVMAOBJ);
+
+ pool_init(&uvm_aobj_pool, sizeof(struct uvm_aobj), 0, 0, 0,
+ "aobjpl", 0,
+ pool_page_alloc_nointr, pool_page_free_nointr, M_UVMAOBJ);
+}
+
+/*
+ * uao_reference: add a ref to an aobj
+ *
+ * => aobj must be unlocked (we will lock it)
+ */
+void
+uao_reference(uobj)
+ struct uvm_object *uobj;
+{
+ UVMHIST_FUNC("uao_reference"); UVMHIST_CALLED(maphist);
+
+ /*
+ * kernel_object already has plenty of references, leave it alone.
+ */
+
+ if (uobj->uo_refs == UVM_OBJ_KERN)
+ return;
+
+ simple_lock(&uobj->vmobjlock);
+ uobj->uo_refs++; /* bump! */
+ UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
+ uobj, uobj->uo_refs,0,0);
+ simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * uao_detach: drop a reference to an aobj
+ *
+ * => aobj must be unlocked, we will lock it
+ */
+void
+uao_detach(uobj)
+ struct uvm_object *uobj;
+{
+ struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+ struct vm_page *pg;
+ boolean_t busybody;
+ UVMHIST_FUNC("uao_detach"); UVMHIST_CALLED(maphist);
+
+ /*
+ * detaching from kernel_object is a noop.
+ */
+ if (uobj->uo_refs == UVM_OBJ_KERN)
+ return;
+
+ simple_lock(&uobj->vmobjlock);
+
+ UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0);
+ uobj->uo_refs--; /* drop ref! */
+ if (uobj->uo_refs) { /* still more refs? */
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
+ return;
+ }
+
+ /*
+ * remove the aobj from the global list.
+ */
+ simple_lock(&uao_list_lock);
+ LIST_REMOVE(aobj, u_list);
+ simple_unlock(&uao_list_lock);
+
+ /*
+ * free all the pages that aren't PG_BUSY, mark for release any that are.
+ */
+
+ busybody = FALSE;
+ for (pg = uobj->memq.tqh_first ; pg != NULL ; pg = pg->listq.tqe_next) {
+ int swslot;
+
+ if (pg->flags & PG_BUSY) {
+ pg->flags |= PG_RELEASED;
+ busybody = TRUE;
+ continue;
+ }
+
+
+ /* zap the mappings, free the swap slot, free the page */
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+
+ swslot = uao_set_swslot(&aobj->u_obj,
+ pg->offset >> PAGE_SHIFT, 0);
+ if (swslot) {
+ uvm_swap_free(swslot, 1);
+ }
+
+ uvm_lock_pageq();
+ uvm_pagefree(pg);
+ uvm_unlock_pageq();
+ }
+
+ /*
+ * if we found any busy pages, we're done for now.
+ * mark the aobj for death, releasepg will finish up for us.
+ */
+ if (busybody) {
+ aobj->u_flags |= UAO_FLAG_KILLME;
+ simple_unlock(&aobj->u_obj.vmobjlock);
+ return;
+ }
+
+ /*
+ * finally, free the rest.
+ */
+ uao_free(aobj);
+}
+
+/*
+ * uao_flush: uh, yea, sure it's flushed. really!
+ */
+boolean_t
+uao_flush(uobj, start, end, flags)
+ struct uvm_object *uobj;
+ vaddr_t start, end;
+ int flags;
+{
+
+ /*
+ * anonymous memory doesn't "flush"
+ */
+ /*
+ * XXX
+ * deal with PGO_DEACTIVATE (for madvise(MADV_SEQUENTIAL))
+ * and PGO_FREE (for msync(MSINVALIDATE))
+ */
+ return TRUE;
+}
+
+/*
+ * uao_get: fetch me a page
+ *
+ * we have three cases:
+ * 1: page is resident -> just return the page.
+ * 2: page is zero-fill -> allocate a new page and zero it.
+ * 3: page is swapped out -> fetch the page from swap.
+ *
+ * cases 1 and 2 can be handled with PGO_LOCKED, case 3 cannot.
+ * so, if the "center" page hits case 3 (or any page, with PGO_ALLPAGES),
+ * then we will need to return VM_PAGER_UNLOCK.
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked! we will _unlock_ it before starting any I/O.
+ * => flags: PGO_ALLPAGES: get all of the pages
+ * PGO_LOCKED: fault data structures are locked
+ * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
+ * => NOTE: caller must check for released pages!!
+ */
+static int
+uao_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ struct vm_page **pps;
+ int *npagesp;
+ int centeridx, advice, flags;
+ vm_prot_t access_type;
+{
+ struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
+ vaddr_t current_offset;
+ vm_page_t ptmp;
+ int lcv, gotpages, maxpages, swslot, rv;
+ boolean_t done;
+ UVMHIST_FUNC("uao_get"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, "aobj=%p offset=%d, flags=%d", aobj, offset, flags,0);
+
+ /*
+ * get number of pages
+ */
+
+ maxpages = *npagesp;
+
+ /*
+ * step 1: handled the case where fault data structures are locked.
+ */
+
+ if (flags & PGO_LOCKED) {
+
+ /*
+ * step 1a: get pages that are already resident. only do
+ * this if the data structures are locked (i.e. the first
+ * time through).
+ */
+
+ done = TRUE; /* be optimistic */
+ gotpages = 0; /* # of pages we got so far */
+
+ for (lcv = 0, current_offset = offset ; lcv < maxpages ;
+ lcv++, current_offset += PAGE_SIZE) {
+ /* do we care about this page? if not, skip it */
+ if (pps[lcv] == PGO_DONTCARE)
+ continue;
+
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /*
+ * if page is new, attempt to allocate the page, then
+ * zero-fill it.
+ */
+ if (ptmp == NULL && uao_find_swslot(aobj,
+ current_offset >> PAGE_SHIFT) == 0) {
+ ptmp = uvm_pagealloc(uobj, current_offset,
+ NULL);
+ if (ptmp) {
+ /* new page */
+ ptmp->flags &= ~(PG_BUSY|PG_FAKE);
+ ptmp->pqflags |= PQ_AOBJ;
+ UVM_PAGE_OWN(ptmp, NULL);
+ uvm_pagezero(ptmp);
+ }
+ }
+
+ /*
+ * to be useful must get a non-busy, non-released page
+ */
+ if (ptmp == NULL ||
+ (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ if (lcv == centeridx ||
+ (flags & PGO_ALLPAGES) != 0)
+ /* need to do a wait or I/O! */
+ done = FALSE;
+ continue;
+ }
+
+ /*
+ * useful page: busy/lock it and plug it in our
+ * result array
+ */
+ /* caller must un-busy this page */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uao_get1");
+ pps[lcv] = ptmp;
+ gotpages++;
+
+ } /* "for" lcv loop */
+
+ /*
+ * step 1b: now we've either done everything needed or we
+ * to unlock and do some waiting or I/O.
+ */
+
+ UVMHIST_LOG(pdhist, "<- done (done=%d)", done, 0,0,0);
+
+ *npagesp = gotpages;
+ if (done)
+ /* bingo! */
+ return(VM_PAGER_OK);
+ else
+ /* EEK! Need to unlock and I/O */
+ return(VM_PAGER_UNLOCK);
+ }
+
+ /*
+ * step 2: get non-resident or busy pages.
+ * object is locked. data structures are unlocked.
+ */
+
+ for (lcv = 0, current_offset = offset ; lcv < maxpages ;
+ lcv++, current_offset += PAGE_SIZE) {
+ /*
+ * - skip over pages we've already gotten or don't want
+ * - skip over pages we don't _have_ to get
+ */
+ if (pps[lcv] != NULL ||
+ (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
+ continue;
+
+ /*
+ * we have yet to locate the current page (pps[lcv]). we
+ * first look for a page that is already at the current offset.
+ * if we find a page, we check to see if it is busy or
+ * released. if that is the case, then we sleep on the page
+ * until it is no longer busy or released and repeat the lookup.
+ * if the page we found is neither busy nor released, then we
+ * busy it (so we own it) and plug it into pps[lcv]. this
+ * 'break's the following while loop and indicates we are
+ * ready to move on to the next page in the "lcv" loop above.
+ *
+ * if we exit the while loop with pps[lcv] still set to NULL,
+ * then it means that we allocated a new busy/fake/clean page
+ * ptmp in the object and we need to do I/O to fill in the data.
+ */
+
+ /* top of "pps" while loop */
+ while (pps[lcv] == NULL) {
+ /* look for a resident page */
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /* not resident? allocate one now (if we can) */
+ if (ptmp == NULL) {
+
+ ptmp = uvm_pagealloc(uobj, current_offset,
+ NULL); /* alloc */
+
+ /* out of RAM? */
+ if (ptmp == NULL) {
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(pdhist,
+ "sleeping, ptmp == NULL\n",0,0,0,0);
+ uvm_wait("uao_getpage");
+ simple_lock(&uobj->vmobjlock);
+ /* goto top of pps while loop */
+ continue;
+ }
+
+ /*
+ * safe with PQ's unlocked: because we just
+ * alloc'd the page
+ */
+ ptmp->pqflags |= PQ_AOBJ;
+
+ /*
+ * got new page ready for I/O. break pps while
+ * loop. pps[lcv] is still NULL.
+ */
+ break;
+ }
+
+ /* page is there, see if we need to wait on it */
+ if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ ptmp->flags |= PG_WANTED;
+ UVMHIST_LOG(pdhist,
+ "sleeping, ptmp->flags 0x%x\n",
+ ptmp->flags,0,0,0);
+ UVM_UNLOCK_AND_WAIT(ptmp, &uobj->vmobjlock, 0,
+ "uao_get", 0);
+ simple_lock(&uobj->vmobjlock);
+ continue; /* goto top of pps while loop */
+ }
+
+ /*
+ * if we get here then the page has become resident and
+ * unbusy between steps 1 and 2. we busy it now (so we
+ * own it) and set pps[lcv] (so that we exit the while
+ * loop).
+ */
+ /* we own it, caller must un-busy */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uao_get2");
+ pps[lcv] = ptmp;
+ }
+
+ /*
+ * if we own the valid page at the correct offset, pps[lcv] will
+ * point to it. nothing more to do except go to the next page.
+ */
+ if (pps[lcv])
+ continue; /* next lcv */
+
+ /*
+ * we have a "fake/busy/clean" page that we just allocated.
+ * do the needed "i/o", either reading from swap or zeroing.
+ */
+ swslot = uao_find_swslot(aobj, current_offset >> PAGE_SHIFT);
+
+ /*
+ * just zero the page if there's nothing in swap.
+ */
+ if (swslot == 0)
+ {
+ /*
+ * page hasn't existed before, just zero it.
+ */
+ uvm_pagezero(ptmp);
+ }
+ else
+ {
+ UVMHIST_LOG(pdhist, "pagein from swslot %d",
+ swslot, 0,0,0);
+
+ /*
+ * page in the swapped-out page.
+ * unlock object for i/o, relock when done.
+ */
+ simple_unlock(&uobj->vmobjlock);
+ rv = uvm_swap_get(ptmp, swslot, PGO_SYNCIO);
+ simple_lock(&uobj->vmobjlock);
+
+ /*
+ * I/O done. check for errors.
+ */
+ if (rv != VM_PAGER_OK)
+ {
+ UVMHIST_LOG(pdhist, "<- done (error=%d)",
+ rv,0,0,0);
+ if (ptmp->flags & PG_WANTED)
+ /* object lock still held */
+ thread_wakeup(ptmp);
+ ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(ptmp, NULL);
+ uvm_lock_pageq();
+ uvm_pagefree(ptmp);
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ return (rv);
+ }
+ }
+
+ /*
+ * we got the page! clear the fake flag (indicates valid
+ * data now in page) and plug into our result array. note
+ * that page is still busy.
+ *
+ * it is the callers job to:
+ * => check if the page is released
+ * => unbusy the page
+ * => activate the page
+ */
+
+ ptmp->flags &= ~PG_FAKE; /* data is valid ... */
+ pmap_clear_modify(PMAP_PGARG(ptmp)); /* ... and clean */
+ pps[lcv] = ptmp;
+
+ } /* lcv loop */
+
+ /*
+ * finally, unlock object and return.
+ */
+
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(pdhist, "<- done (OK)",0,0,0,0);
+ return(VM_PAGER_OK);
+}
+
+/*
+ * uao_releasepg: handle released page in an aobj
+ *
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ * to dispose of.
+ * => caller must handle PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ * killed the page's object. if we return TRUE, then we
+ * return with the object locked.
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ * with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the aobj if it is not referenced and we are suppose to
+ * kill it ("KILLME").
+ */
+static boolean_t uao_releasepg(pg, nextpgp)
+ struct vm_page *pg;
+ struct vm_page **nextpgp; /* OUT */
+{
+ struct uvm_aobj *aobj = (struct uvm_aobj *) pg->uobject;
+ int slot;
+
+#ifdef DIAGNOSTIC
+ if ((pg->flags & PG_RELEASED) == 0)
+ panic("uao_releasepg: page not released!");
+#endif
+
+ /*
+ * dispose of the page [caller handles PG_WANTED] and swap slot.
+ */
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+ slot = uao_set_swslot(&aobj->u_obj, pg->offset >> PAGE_SHIFT, 0);
+ if (slot)
+ uvm_swap_free(slot, 1);
+ uvm_lock_pageq();
+ if (nextpgp)
+ *nextpgp = pg->pageq.tqe_next; /* next page for daemon */
+ uvm_pagefree(pg);
+ if (!nextpgp)
+ uvm_unlock_pageq(); /* keep locked for daemon */
+
+ /*
+ * if we're not killing the object, we're done.
+ */
+ if ((aobj->u_flags & UAO_FLAG_KILLME) == 0)
+ return TRUE;
+
+#ifdef DIAGNOSTIC
+ if (aobj->u_obj.uo_refs)
+ panic("uvm_km_releasepg: kill flag set on referenced object!");
+#endif
+
+ /*
+ * if there are still pages in the object, we're done for now.
+ */
+ if (aobj->u_obj.uo_npages != 0)
+ return TRUE;
+
+#ifdef DIAGNOSTIC
+ if (aobj->u_obj.memq.tqh_first)
+ panic("uvn_releasepg: pages in object with npages == 0");
+#endif
+
+ /*
+ * finally, free the rest.
+ */
+ uao_free(aobj);
+
+ return FALSE;
+}
diff --git a/sys/uvm/uvm_aobj.h b/sys/uvm/uvm_aobj.h
new file mode 100644
index 00000000000..61beadb2157
--- /dev/null
+++ b/sys/uvm/uvm_aobj.h
@@ -0,0 +1,77 @@
+/* $NetBSD: uvm_aobj.h,v 1.6 1998/02/12 07:36:45 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
+ * Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_aobj.h,v 1.1.2.4 1998/02/06 05:19:28 chs Exp
+ */
+/*
+ * uvm_aobj.h: anonymous memory uvm_object pager
+ *
+ * author: Chuck Silvers <chuq@chuq.com>
+ * started: Jan-1998
+ *
+ * - design mostly from Chuck Cranor
+ */
+
+#ifndef _UVM_UVM_AOBJ_H_
+#define _UVM_UVM_AOBJ_H_
+
+/*
+ * flags
+ */
+
+/* flags for uao_create: can only be used one time (at bootup) */
+#define UAO_FLAG_KERNOBJ 0x1 /* create kernel object */
+#define UAO_FLAG_KERNSWAP 0x2 /* enable kernel swap */
+
+/* internal flags */
+#define UAO_FLAG_KILLME 0x4 /* aobj should die when last released
+ * page is no longer PG_BUSY ... */
+#define UAO_FLAG_NOSWAP 0x8 /* aobj can't swap (kernel obj only!) */
+
+/*
+ * prototypes
+ */
+
+int uao_set_swslot __P((struct uvm_object *, int, int));
+
+/*
+ * globals
+ */
+
+extern struct uvm_pagerops aobj_pager;
+
+#endif /* _UVM_UVM_AOBJ_H_ */
diff --git a/sys/uvm/uvm_ddb.h b/sys/uvm/uvm_ddb.h
new file mode 100644
index 00000000000..7c82bdf0dd6
--- /dev/null
+++ b/sys/uvm/uvm_ddb.h
@@ -0,0 +1,56 @@
+/* $NetBSD: uvm_ddb.h,v 1.1 1998/07/04 22:18:53 jonathan Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_extern.h,v 1.1.2.21 1998/02/07 01:16:53 chs Exp
+ */
+
+#ifndef _UVM_UVM_DDB_H_
+#define _UVM_UVM_DDB_H_
+
+#if defined(DDB)
+void uvm_map_print __P((vm_map_t, boolean_t));
+void uvm_map_printit __P((vm_map_t, boolean_t,
+ int (*) __P((const char *, ...))));
+
+void uvm_object_print __P((struct uvm_object *, boolean_t));
+void uvm_object_printit __P((struct uvm_object *, boolean_t,
+ int (*) __P((const char *, ...))));
+void uvm_page_print __P((struct vm_page *, boolean_t));
+void uvm_page_printit __P((struct vm_page *, boolean_t,
+ int (*) __P((const char *, ...))));
+#endif
+#endif _UVM_UVM_DDB_H_
diff --git a/sys/uvm/uvm_device.c b/sys/uvm/uvm_device.c
new file mode 100644
index 00000000000..6c249c42877
--- /dev/null
+++ b/sys/uvm/uvm_device.c
@@ -0,0 +1,507 @@
+/* $NetBSD: uvm_device.c,v 1.11 1998/11/19 05:23:26 mrg Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_device.c,v 1.1.2.9 1998/02/06 05:11:47 chs Exp
+ */
+
+/*
+ * uvm_device.c: the device pager.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_device.h>
+
+/*
+ * private global data structure
+ *
+ * we keep a list of active device objects in the system.
+ */
+
+LIST_HEAD(udv_list_struct, uvm_device);
+static struct udv_list_struct udv_list;
+static simple_lock_data_t udv_lock;
+
+/*
+ * functions
+ */
+
+static void udv_init __P((void));
+struct uvm_object *udv_attach __P((void *, vm_prot_t));
+static void udv_reference __P((struct uvm_object *));
+static void udv_detach __P((struct uvm_object *));
+static int udv_fault __P((struct uvm_faultinfo *, vaddr_t,
+ vm_page_t *, int, int, vm_fault_t,
+ vm_prot_t, int));
+static boolean_t udv_flush __P((struct uvm_object *, vaddr_t,
+ vaddr_t, int));
+static int udv_asyncget __P((struct uvm_object *, vaddr_t,
+ int));
+static int udv_put __P((struct uvm_object *, vm_page_t *,
+ int, boolean_t));
+
+/*
+ * master pager structure
+ */
+
+struct uvm_pagerops uvm_deviceops = {
+ udv_init,
+ udv_attach,
+ udv_reference,
+ udv_detach,
+ udv_fault,
+ udv_flush,
+ NULL, /* no get function since we have udv_fault */
+ udv_asyncget,
+ udv_put,
+ NULL, /* no cluster function */
+ NULL, /* no put cluster function */
+ NULL, /* no share protect. no share maps for us */
+ NULL, /* no AIO-DONE function since no async i/o */
+ NULL, /* no releasepg function since no normal pages */
+};
+
+/*
+ * the ops!
+ */
+
+/*
+ * udv_init
+ *
+ * init pager private data structures.
+ */
+
+void
+udv_init()
+{
+
+ LIST_INIT(&udv_list);
+ simple_lock_init(&udv_lock);
+}
+
+/*
+ * udv_attach
+ *
+ * get a VM object that is associated with a device. allocate a new
+ * one if needed.
+ *
+ * => caller must _not_ already be holding the lock on the uvm_object.
+ * => in fact, nothing should be locked so that we can sleep here.
+ */
+struct uvm_object *
+udv_attach(arg, accessprot)
+ void *arg;
+ vm_prot_t accessprot;
+{
+ dev_t device = *((dev_t *) arg);
+ struct uvm_device *udv, *lcv;
+ int (*mapfn) __P((dev_t, int, int));
+ UVMHIST_FUNC("udv_attach"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(device=0x%x)", device,0,0,0);
+
+ /*
+ * before we do anything, ensure this device supports mmap
+ */
+
+ mapfn = cdevsw[major(device)].d_mmap;
+ if (mapfn == NULL ||
+ mapfn == (int (*) __P((dev_t, int, int))) enodev ||
+ mapfn == (int (*) __P((dev_t, int, int))) nullop)
+ return(NULL);
+
+ /*
+ * keep looping until we get it
+ */
+
+ while (1) {
+
+ /*
+ * first, attempt to find it on the main list
+ */
+
+ simple_lock(&udv_lock);
+ for (lcv = udv_list.lh_first ; lcv != NULL ; lcv = lcv->u_list.le_next) {
+ if (device == lcv->u_device)
+ break;
+ }
+
+ /*
+ * got it on main list. put a hold on it and unlock udv_lock.
+ */
+
+ if (lcv) {
+
+ /*
+ * if someone else has a hold on it, sleep and start
+ * over again.
+ */
+
+ if (lcv->u_flags & UVM_DEVICE_HOLD) {
+ lcv->u_flags |= UVM_DEVICE_WANTED;
+ UVM_UNLOCK_AND_WAIT(lcv, &udv_lock, FALSE,
+ "udv_attach",0);
+ continue;
+ }
+
+ /* we are now holding it */
+ lcv->u_flags |= UVM_DEVICE_HOLD;
+ simple_unlock(&udv_lock);
+
+ /*
+ * bump reference count, unhold, return.
+ */
+
+ simple_lock(&lcv->u_obj.vmobjlock);
+ lcv->u_obj.uo_refs++;
+ simple_unlock(&lcv->u_obj.vmobjlock);
+
+ simple_lock(&udv_lock);
+ if (lcv->u_flags & UVM_DEVICE_WANTED)
+ wakeup(lcv);
+ lcv->u_flags &= ~(UVM_DEVICE_WANTED|UVM_DEVICE_HOLD);
+ simple_unlock(&udv_lock);
+ return(&lcv->u_obj);
+ }
+
+ /*
+ * did not find it on main list. need to malloc a new one.
+ */
+
+ simple_unlock(&udv_lock);
+ /* NOTE: we could sleep in the following malloc() */
+ MALLOC(udv, struct uvm_device *, sizeof(*udv), M_TEMP, M_WAITOK);
+ simple_lock(&udv_lock);
+
+ /*
+ * now we have to double check to make sure no one added it
+ * to the list while we were sleeping...
+ */
+
+ for (lcv = udv_list.lh_first ; lcv != NULL ;
+ lcv = lcv->u_list.le_next) {
+ if (device == lcv->u_device)
+ break;
+ }
+
+ /*
+ * did we lose a race to someone else? free our memory and retry.
+ */
+
+ if (lcv) {
+ simple_unlock(&udv_lock);
+ FREE(udv, M_TEMP);
+ continue;
+ }
+
+ /*
+ * we have it! init the data structures, add to list
+ * and return.
+ */
+
+ simple_lock_init(&udv->u_obj.vmobjlock);
+ udv->u_obj.pgops = &uvm_deviceops;
+ TAILQ_INIT(&udv->u_obj.memq); /* not used, but be safe */
+ udv->u_obj.uo_npages = 0;
+ udv->u_obj.uo_refs = 1;
+ udv->u_flags = 0;
+ udv->u_device = device;
+ LIST_INSERT_HEAD(&udv_list, udv, u_list);
+ simple_unlock(&udv_lock);
+
+ return(&udv->u_obj);
+
+ } /* while(1) loop */
+
+ /*NOTREACHED*/
+}
+
+/*
+ * udv_reference
+ *
+ * add a reference to a VM object. Note that the reference count must
+ * already be one (the passed in reference) so there is no chance of the
+ * udv being released or locked out here.
+ *
+ * => caller must call with object unlocked.
+ */
+
+static void
+udv_reference(uobj)
+ struct uvm_object *uobj;
+{
+ UVMHIST_FUNC("udv_reference"); UVMHIST_CALLED(maphist);
+
+ simple_lock(&uobj->vmobjlock);
+ uobj->uo_refs++;
+ UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
+ uobj, uobj->uo_refs,0,0);
+ simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * udv_detach
+ *
+ * remove a reference to a VM object.
+ *
+ * => caller must call with object unlocked and map locked.
+ */
+
+static void
+udv_detach(uobj)
+ struct uvm_object *uobj;
+{
+ struct uvm_device *udv = (struct uvm_device *) uobj;
+ UVMHIST_FUNC("udv_detach"); UVMHIST_CALLED(maphist);
+
+ /*
+ * loop until done
+ */
+
+ while (1) {
+ simple_lock(&uobj->vmobjlock);
+
+ if (uobj->uo_refs > 1) {
+ uobj->uo_refs--; /* drop ref! */
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(maphist," <- done, uobj=0x%x, ref=%d",
+ uobj,uobj->uo_refs,0,0);
+ return;
+ }
+
+#ifdef DIAGNOSTIC
+ if (uobj->uo_npages || uobj->memq.tqh_first)
+ panic("udv_detach: pages in a device object?");
+#endif
+
+ /*
+ * now lock udv_lock
+ */
+ simple_lock(&udv_lock);
+
+ /*
+ * is it being held? if so, wait until others are done.
+ */
+ if (udv->u_flags & UVM_DEVICE_HOLD) {
+
+ /*
+ * want it
+ */
+ udv->u_flags |= UVM_DEVICE_WANTED;
+ simple_unlock(&uobj->vmobjlock);
+ UVM_UNLOCK_AND_WAIT(udv, &udv_lock, FALSE, "udv_detach",0);
+ continue;
+ }
+
+ /*
+ * got it! nuke it now.
+ */
+
+ LIST_REMOVE(udv, u_list);
+ if (udv->u_flags & UVM_DEVICE_WANTED)
+ wakeup(udv);
+ FREE(udv, M_TEMP);
+ break; /* DONE! */
+
+ } /* while (1) loop */
+
+ UVMHIST_LOG(maphist," <- done, freed uobj=0x%x", uobj,0,0,0);
+ return;
+}
+
+
+/*
+ * udv_flush
+ *
+ * flush pages out of a uvm object. a no-op for devices.
+ */
+
+static boolean_t udv_flush(uobj, start, stop, flags)
+ struct uvm_object *uobj;
+ vaddr_t start, stop;
+ int flags;
+{
+
+ return(TRUE);
+}
+
+/*
+ * udv_fault: non-standard fault routine for device "pages"
+ *
+ * => rather than having a "get" function, we have a fault routine
+ * since we don't return vm_pages we need full control over the
+ * pmap_enter map in
+ * => all the usual fault data structured are locked by the caller
+ * (i.e. maps(read), amap (if any), uobj)
+ * => on return, we unlock all fault data structures
+ * => flags: PGO_ALLPAGES: get all of the pages
+ * PGO_LOCKED: fault data structures are locked
+ * XXX: currently PGO_LOCKED is always required ... consider removing
+ * it as a flag
+ * => NOTE: vaddr is the VA of pps[0] in ufi->entry, _NOT_ pps[centeridx]
+ */
+
+static int
+udv_fault(ufi, vaddr, pps, npages, centeridx, fault_type, access_type, flags)
+ struct uvm_faultinfo *ufi;
+ vaddr_t vaddr;
+ vm_page_t *pps;
+ int npages, centeridx, flags;
+ vm_fault_t fault_type;
+ vm_prot_t access_type;
+{
+ struct vm_map_entry *entry = ufi->entry;
+ struct uvm_object *uobj = entry->object.uvm_obj;
+ struct uvm_device *udv = (struct uvm_device *)uobj;
+ vaddr_t curr_offset, curr_va;
+ paddr_t paddr;
+ int lcv, retval, mdpgno;
+ dev_t device;
+ int (*mapfn) __P((dev_t, int, int));
+ UVMHIST_FUNC("udv_fault"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist," flags=%d", flags,0,0,0);
+
+ /*
+ * XXX: !PGO_LOCKED calls are currently not allowed (or used)
+ */
+
+ if ((flags & PGO_LOCKED) == 0)
+ panic("udv_fault: !PGO_LOCKED fault");
+
+ /*
+ * we do not allow device mappings to be mapped copy-on-write
+ * so we kill any attempt to do so here.
+ */
+
+ if (UVM_ET_ISCOPYONWRITE(entry)) {
+ UVMHIST_LOG(maphist, "<- failed -- COW entry (etype=0x%x)",
+ entry->etype, 0,0,0);
+ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
+ return(VM_PAGER_ERROR);
+ }
+
+ /*
+ * get device map function.
+ */
+ device = udv->u_device;
+ mapfn = cdevsw[major(device)].d_mmap;
+
+ /*
+ * now we must determine the offset in udv to use and the VA to
+ * use for pmap_enter. note that we always use orig_map's pmap
+ * for pmap_enter (even if we have a submap). since virtual
+ * addresses in a submap must match the main map, this is ok.
+ */
+ /* udv offset = (offset from start of entry) + entry's offset */
+ curr_offset = (vaddr - entry->start) + entry->offset;
+ /* pmap va = vaddr (virtual address of pps[0]) */
+ curr_va = vaddr;
+
+ /*
+ * loop over the page range entering in as needed
+ */
+
+ retval = VM_PAGER_OK;
+ for (lcv = 0 ; lcv < npages ; lcv++, curr_offset += PAGE_SIZE,
+ curr_va += PAGE_SIZE) {
+ if ((flags & PGO_ALLPAGES) == 0 && lcv != centeridx)
+ continue;
+
+ if (pps[lcv] == PGO_DONTCARE)
+ continue;
+
+ mdpgno = (*mapfn)(device, (int)curr_offset, access_type);
+ if (mdpgno == -1) {
+ retval = VM_PAGER_ERROR;
+ break;
+ }
+ paddr = pmap_phys_address(mdpgno);
+ UVMHIST_LOG(maphist,
+ " MAPPING: device: pm=0x%x, va=0x%x, pa=0x%x, at=%d",
+ ufi->orig_map->pmap, curr_va, (int)paddr, access_type);
+ pmap_enter(ufi->orig_map->pmap, curr_va, paddr, access_type, 0);
+
+ }
+
+ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap, uobj, NULL);
+ return(retval);
+}
+
+/*
+ * udv_asyncget: start async I/O to bring pages into ram
+ *
+ * => caller must lock object(???XXX: see if this is best)
+ * => a no-op for devices
+ */
+
+static int
+udv_asyncget(uobj, offset, npages)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ int npages;
+{
+
+ return(KERN_SUCCESS);
+}
+
+/*
+ * udv_put: flush page data to backing store.
+ *
+ * => this function should never be called (since we never have any
+ * page structures to "put")
+ */
+
+static int
+udv_put(uobj, pps, npages, flags)
+ struct uvm_object *uobj;
+ struct vm_page **pps;
+ int npages, flags;
+{
+
+ panic("udv_put: trying to page out to a device!");
+}
diff --git a/sys/uvm/uvm_device.h b/sys/uvm/uvm_device.h
new file mode 100644
index 00000000000..347e4cb1dac
--- /dev/null
+++ b/sys/uvm/uvm_device.h
@@ -0,0 +1,76 @@
+/* $NetBSD: uvm_device.h,v 1.5 1998/03/09 00:58:56 mrg Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_device.h,v 1.1.2.2 1997/10/03 17:39:44 chuck Exp
+ */
+
+#ifndef _UVM_UVM_DEVICE_H_
+#define _UVM_UVM_DEVICE_H_
+
+/*
+ * uvm_device.h
+ *
+ * device handle into the VM system.
+ */
+
+/*
+ * the uvm_device structure. object is put at the top of the data structure.
+ * this allows:
+ * (struct uvm_device *) == (struct uvm_object *)
+ */
+
+struct uvm_device {
+ struct uvm_object u_obj; /* the actual VM object */
+ int u_flags; /* flags [LOCKED BY UDV_LOCK!] */
+ dev_t u_device; /* our device */
+ LIST_ENTRY(uvm_device) u_list; /* list of device objects */
+};
+
+/*
+ * u_flags values
+ */
+
+#define UVM_DEVICE_HOLD 0x1 /* someone has a "hold" on it */
+#define UVM_DEVICE_WANTED 0x2 /* someone wants to put a "hold" on */
+
+/*
+ * prototypes
+ */
+
+struct uvm_object *udv_attach __P((void *, vm_prot_t));
+
+#endif /* _UVM_UVM_DEVICE_H_ */
diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h
new file mode 100644
index 00000000000..bcec521f665
--- /dev/null
+++ b/sys/uvm/uvm_extern.h
@@ -0,0 +1,386 @@
+/* $NetBSD: uvm_extern.h,v 1.21 1998/09/08 23:44:21 thorpej Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_extern.h,v 1.1.2.21 1998/02/07 01:16:53 chs Exp
+ */
+
+#ifndef _UVM_UVM_EXTERN_H_
+#define _UVM_UVM_EXTERN_H_
+
+/*
+ * uvm_extern.h: this file defines the external interface to the VM system.
+ *
+ * this should be the only file included by non-VM parts of the kernel
+ * which need access to VM services. if you want to know the interface
+ * to the MI VM layer without knowing the details, this is the file to
+ * learn.
+ *
+ * NOTE: vm system calls are prototyped in syscallargs.h
+ */
+
+/*
+ * defines
+ */
+
+/*
+ * the following defines are for uvm_map and functions which call it.
+ */
+
+/* protections bits */
+#define UVM_PROT_MASK 0x07 /* protection mask */
+#define UVM_PROT_NONE 0x00 /* protection none */
+#define UVM_PROT_ALL 0x07 /* everything */
+#define UVM_PROT_READ 0x01 /* read */
+#define UVM_PROT_WRITE 0x02 /* write */
+#define UVM_PROT_EXEC 0x04 /* exec */
+
+/* protection short codes */
+#define UVM_PROT_R 0x01 /* read */
+#define UVM_PROT_W 0x02 /* write */
+#define UVM_PROT_RW 0x03 /* read-write */
+#define UVM_PROT_X 0x04 /* exec */
+#define UVM_PROT_RX 0x05 /* read-exec */
+#define UVM_PROT_WX 0x06 /* write-exec */
+#define UVM_PROT_RWX 0x07 /* read-write-exec */
+
+/* 0x08: not used */
+
+/* inherit codes */
+#define UVM_INH_MASK 0x30 /* inherit mask */
+#define UVM_INH_SHARE 0x00 /* "share" */
+#define UVM_INH_COPY 0x10 /* "copy" */
+#define UVM_INH_NONE 0x20 /* "none" */
+#define UVM_INH_DONATE 0x30 /* "donate" << not used */
+
+/* 0x40, 0x80: not used */
+
+/* bits 0x700: max protection, 0x800: not used */
+
+/* bits 0x7000: advice, 0x8000: not used */
+/* advice: matches MADV_* from sys/mman.h */
+#define UVM_ADV_NORMAL 0x0 /* 'normal' */
+#define UVM_ADV_RANDOM 0x1 /* 'random' */
+#define UVM_ADV_SEQUENTIAL 0x2 /* 'sequential' */
+/* 0x3: will need, 0x4: dontneed */
+#define UVM_ADV_MASK 0x7 /* mask */
+
+/* mapping flags */
+#define UVM_FLAG_FIXED 0x010000 /* find space */
+#define UVM_FLAG_OVERLAY 0x020000 /* establish overlay */
+#define UVM_FLAG_NOMERGE 0x040000 /* don't merge map entries */
+#define UVM_FLAG_COPYONW 0x080000 /* set copy_on_write flag */
+#define UVM_FLAG_AMAPPAD 0x100000 /* for bss: pad amap to reduce malloc() */
+#define UVM_FLAG_TRYLOCK 0x200000 /* fail if we can not lock map */
+
+/* macros to extract info */
+#define UVM_PROTECTION(X) ((X) & UVM_PROT_MASK)
+#define UVM_INHERIT(X) (((X) & UVM_INH_MASK) >> 4)
+#define UVM_MAXPROTECTION(X) (((X) >> 8) & UVM_PROT_MASK)
+#define UVM_ADVICE(X) (((X) >> 12) & UVM_ADV_MASK)
+
+#define UVM_MAPFLAG(PROT,MAXPROT,INH,ADVICE,FLAGS) \
+ ((MAXPROT << 8)|(PROT)|(INH)|((ADVICE) << 12)|(FLAGS))
+
+/* magic offset value */
+#define UVM_UNKNOWN_OFFSET ((vaddr_t) -1)
+ /* offset not known(obj) or don't care(!obj) */
+
+/*
+ * the following defines are for uvm_km_kmemalloc's flags
+ */
+
+#define UVM_KMF_NOWAIT 0x1 /* matches M_NOWAIT */
+#define UVM_KMF_VALLOC 0x2 /* allocate VA only */
+#define UVM_KMF_TRYLOCK UVM_FLAG_TRYLOCK /* try locking only */
+
+/*
+ * the following defines the strategies for uvm_pagealloc_strat()
+ */
+#define UVM_PGA_STRAT_NORMAL 0 /* high -> low free list walk */
+#define UVM_PGA_STRAT_ONLY 1 /* only specified free list */
+#define UVM_PGA_STRAT_FALLBACK 2 /* ONLY falls back on NORMAL */
+
+/*
+ * structures
+ */
+
+struct core;
+struct mount;
+struct pglist;
+struct proc;
+struct ucred;
+struct uio;
+struct uvm_object;
+struct vm_anon;
+struct vmspace;
+struct pmap;
+struct vnode;
+
+/*
+ * uvmexp: global data structures that are exported to parts of the kernel
+ * other than the vm system.
+ */
+
+struct uvmexp {
+ /* vm_page constants */
+ int pagesize; /* size of a page (PAGE_SIZE): must be power of 2 */
+ int pagemask; /* page mask */
+ int pageshift; /* page shift */
+
+ /* vm_page counters */
+ int npages; /* number of pages we manage */
+ int free; /* number of free pages */
+ int active; /* number of active pages */
+ int inactive; /* number of pages that we free'd but may want back */
+ int paging; /* number of pages in the process of being paged out */
+ int wired; /* number of wired pages */
+ int reserve_pagedaemon; /* number of pages reserved for pagedaemon */
+ int reserve_kernel; /* number of pages reserved for kernel */
+
+ /* pageout params */
+ int freemin; /* min number of free pages */
+ int freetarg; /* target number of free pages */
+ int inactarg; /* target number of inactive pages */
+ int wiredmax; /* max number of wired pages */
+
+ /* swap */
+ int nswapdev; /* number of configured swap devices in system */
+ int swpages; /* number of PAGE_SIZE'ed swap pages */
+ int swpginuse; /* number of swap pages in use */
+ int nswget; /* number of times fault calls uvm_swap_get() */
+ int nanon; /* number total of anon's in system */
+ int nfreeanon; /* number of free anon's */
+
+ /* stat counters */
+ int faults; /* page fault count */
+ int traps; /* trap count */
+ int intrs; /* interrupt count */
+ int swtch; /* context switch count */
+ int softs; /* software interrupt count */
+ int syscalls; /* system calls */
+ int pageins; /* pagein operation count */
+ /* pageouts are in pdpageouts below */
+ int swapins; /* swapins */
+ int swapouts; /* swapouts */
+ int pgswapin; /* pages swapped in */
+ int pgswapout; /* pages swapped out */
+ int forks; /* forks */
+ int forks_ppwait; /* forks where parent waits */
+ int forks_sharevm; /* forks where vmspace is shared */
+
+ /* fault subcounters */
+ int fltnoram; /* number of times fault was out of ram */
+ int fltnoanon; /* number of times fault was out of anons */
+ int fltpgwait; /* number of times fault had to wait on a page */
+ int fltpgrele; /* number of times fault found a released page */
+ int fltrelck; /* number of times fault relock called */
+ int fltrelckok; /* number of times fault relock is a success */
+ int fltanget; /* number of times fault gets anon page */
+ int fltanretry; /* number of times fault retrys an anon get */
+ int fltamcopy; /* number of times fault clears "needs copy" */
+ int fltnamap; /* number of times fault maps a neighbor anon page */
+ int fltnomap; /* number of times fault maps a neighbor obj page */
+ int fltlget; /* number of times fault does a locked pgo_get */
+ int fltget; /* number of times fault does an unlocked get */
+ int flt_anon; /* number of times fault anon (case 1a) */
+ int flt_acow; /* number of times fault anon cow (case 1b) */
+ int flt_obj; /* number of times fault is on object page (2a) */
+ int flt_prcopy; /* number of times fault promotes with copy (2b) */
+ int flt_przero; /* number of times fault promotes with zerofill (2b) */
+
+ /* daemon counters */
+ int pdwoke; /* number of times daemon woke up */
+ int pdrevs; /* number of times daemon rev'd clock hand */
+ int pdswout; /* number of times daemon called for swapout */
+ int pdfreed; /* number of pages daemon freed since boot */
+ int pdscans; /* number of pages daemon scaned since boot */
+ int pdanscan; /* number of anonymous pages scanned by daemon */
+ int pdobscan; /* number of object pages scanned by daemon */
+ int pdreact; /* number of pages daemon reactivated since boot */
+ int pdbusy; /* number of times daemon found a busy page */
+ int pdpageouts; /* number of times daemon started a pageout */
+ int pdpending; /* number of times daemon got a pending pagout */
+ int pddeact; /* number of pages daemon deactivates */
+
+ /* kernel memory objects: managed by uvm_km_kmemalloc() only! */
+ struct uvm_object *kmem_object;
+ struct uvm_object *mb_object;
+};
+
+
+extern struct uvmexp uvmexp;
+
+/*
+ * macros
+ */
+
+/* zalloc zeros memory, alloc does not */
+#define uvm_km_zalloc(MAP,SIZE) uvm_km_alloc1(MAP,SIZE,TRUE)
+#define uvm_km_alloc(MAP,SIZE) uvm_km_alloc1(MAP,SIZE,FALSE)
+
+/*
+ * typedefs
+ */
+
+typedef unsigned int uvm_flag_t;
+typedef int vm_fault_t;
+
+/* uvm_aobj.c */
+struct uvm_object *uao_create __P((vsize_t, int));
+void uao_detach __P((struct uvm_object *));
+void uao_reference __P((struct uvm_object *));
+
+/* uvm_fault.c */
+int uvm_fault __P((vm_map_t, vaddr_t,
+ vm_fault_t, vm_prot_t));
+ /* handle a page fault */
+
+/* uvm_glue.c */
+#if defined(KGDB)
+void uvm_chgkprot __P((caddr_t, size_t, int));
+#endif
+void uvm_fork __P((struct proc *, struct proc *, boolean_t));
+void uvm_exit __P((struct proc *));
+void uvm_init_limits __P((struct proc *));
+boolean_t uvm_kernacc __P((caddr_t, size_t, int));
+__dead void uvm_scheduler __P((void)) __attribute__((noreturn));
+void uvm_swapin __P((struct proc *));
+boolean_t uvm_useracc __P((caddr_t, size_t, int));
+void uvm_vslock __P((struct proc *, caddr_t, size_t));
+void uvm_vsunlock __P((struct proc *, caddr_t, size_t));
+
+
+/* uvm_init.c */
+void uvm_init __P((void));
+ /* init the uvm system */
+
+/* uvm_io.c */
+int uvm_io __P((vm_map_t, struct uio *));
+
+/* uvm_km.c */
+vaddr_t uvm_km_alloc1 __P((vm_map_t, vsize_t, boolean_t));
+void uvm_km_free __P((vm_map_t, vaddr_t, vsize_t));
+void uvm_km_free_wakeup __P((vm_map_t, vaddr_t,
+ vsize_t));
+vaddr_t uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *,
+ vsize_t, int));
+struct vm_map *uvm_km_suballoc __P((vm_map_t, vaddr_t *,
+ vaddr_t *, vsize_t, boolean_t,
+ boolean_t, vm_map_t));
+vaddr_t uvm_km_valloc __P((vm_map_t, vsize_t));
+vaddr_t uvm_km_valloc_wait __P((vm_map_t, vsize_t));
+vaddr_t uvm_km_alloc_poolpage1 __P((vm_map_t,
+ struct uvm_object *, boolean_t));
+void uvm_km_free_poolpage1 __P((vm_map_t, vaddr_t));
+
+#define uvm_km_alloc_poolpage(waitok) uvm_km_alloc_poolpage1(kmem_map, \
+ uvmexp.kmem_object, (waitok))
+#define uvm_km_free_poolpage(addr) uvm_km_free_poolpage1(kmem_map, (addr))
+
+/* uvm_map.c */
+int uvm_map __P((vm_map_t, vaddr_t *, vsize_t,
+ struct uvm_object *, vaddr_t, uvm_flag_t));
+int uvm_map_pageable __P((vm_map_t, vaddr_t,
+ vaddr_t, boolean_t));
+boolean_t uvm_map_checkprot __P((vm_map_t, vaddr_t,
+ vaddr_t, vm_prot_t));
+int uvm_map_protect __P((vm_map_t, vaddr_t,
+ vaddr_t, vm_prot_t, boolean_t));
+struct vmspace *uvmspace_alloc __P((vaddr_t, vaddr_t,
+ boolean_t));
+void uvmspace_init __P((struct vmspace *, struct pmap *,
+ vaddr_t, vaddr_t, boolean_t));
+void uvmspace_exec __P((struct proc *));
+struct vmspace *uvmspace_fork __P((struct vmspace *));
+void uvmspace_free __P((struct vmspace *));
+void uvmspace_share __P((struct proc *, struct proc *));
+void uvmspace_unshare __P((struct proc *));
+
+
+/* uvm_meter.c */
+void uvm_meter __P((void));
+int uvm_sysctl __P((int *, u_int, void *, size_t *,
+ void *, size_t, struct proc *));
+void uvm_total __P((struct vmtotal *));
+
+/* uvm_mmap.c */
+int uvm_mmap __P((vm_map_t, vaddr_t *, vsize_t,
+ vm_prot_t, vm_prot_t, int,
+ caddr_t, vaddr_t));
+
+/* uvm_page.c */
+struct vm_page *uvm_pagealloc_strat __P((struct uvm_object *,
+ vaddr_t, struct vm_anon *, int, int));
+#define uvm_pagealloc(obj, off, anon) \
+ uvm_pagealloc_strat((obj), (off), (anon), UVM_PGA_STRAT_NORMAL, 0)
+void uvm_pagerealloc __P((struct vm_page *,
+ struct uvm_object *, vaddr_t));
+/* Actually, uvm_page_physload takes PF#s which need their own type */
+void uvm_page_physload __P((vaddr_t, vaddr_t,
+ vaddr_t, vaddr_t, int));
+void uvm_setpagesize __P((void));
+
+/* uvm_pdaemon.c */
+void uvm_pageout __P((void));
+
+/* uvm_pglist.c */
+int uvm_pglistalloc __P((psize_t, paddr_t,
+ paddr_t, paddr_t, paddr_t,
+ struct pglist *, int, int));
+void uvm_pglistfree __P((struct pglist *));
+
+/* uvm_swap.c */
+void uvm_swap_init __P((void));
+
+/* uvm_unix.c */
+int uvm_coredump __P((struct proc *, struct vnode *,
+ struct ucred *, struct core *));
+int uvm_grow __P((struct proc *, vaddr_t));
+
+/* uvm_user.c */
+int uvm_deallocate __P((vm_map_t, vaddr_t, vsize_t));
+
+/* uvm_vnode.c */
+void uvm_vnp_setsize __P((struct vnode *, u_quad_t));
+void uvm_vnp_sync __P((struct mount *));
+void uvm_vnp_terminate __P((struct vnode *));
+ /* terminate a uvm/uvn object */
+boolean_t uvm_vnp_uncache __P((struct vnode *));
+struct uvm_object *uvn_attach __P((void *, vm_prot_t));
+
+#endif /* _UVM_UVM_EXTERN_H_ */
+
diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c
new file mode 100644
index 00000000000..10978e8c14e
--- /dev/null
+++ b/sys/uvm/uvm_fault.c
@@ -0,0 +1,1747 @@
+/* $NetBSD: uvm_fault.c,v 1.19 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault.c,v 1.1.2.23 1998/02/06 05:29:05 chs Exp
+ */
+
+/*
+ * uvm_fault.c: fault handler
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/user.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ *
+ * a word on page faults:
+ *
+ * types of page faults we handle:
+ *
+ * CASE 1: upper layer faults CASE 2: lower layer faults
+ *
+ * CASE 1A CASE 1B CASE 2A CASE 2B
+ * read/write1 write>1 read/write +-cow_write/zero
+ * | | | |
+ * +--|--+ +--|--+ +-----+ + | + | +-----+
+ * amap | V | | ----------->new| | | | ^ |
+ * +-----+ +-----+ +-----+ + | + | +--|--+
+ * | | |
+ * +-----+ +-----+ +--|--+ | +--|--+
+ * uobj | d/c | | d/c | | V | +----| |
+ * +-----+ +-----+ +-----+ +-----+
+ *
+ * d/c = don't care
+ *
+ * case [0]: layerless fault
+ * no amap or uobj is present. this is an error.
+ *
+ * case [1]: upper layer fault [anon active]
+ * 1A: [read] or [write with anon->an_ref == 1]
+ * I/O takes place in top level anon and uobj is not touched.
+ * 1B: [write with anon->an_ref > 1]
+ * new anon is alloc'd and data is copied off ["COW"]
+ *
+ * case [2]: lower layer fault [uobj]
+ * 2A: [read on non-NULL uobj] or [write to non-copy_on_write area]
+ * I/O takes place directly in object.
+ * 2B: [write to copy_on_write] or [read on NULL uobj]
+ * data is "promoted" from uobj to a new anon.
+ * if uobj is null, then we zero fill.
+ *
+ * we follow the standard UVM locking protocol ordering:
+ *
+ * MAPS => AMAP => UOBJ => ANON => PAGE QUEUES (PQ)
+ * we hold a PG_BUSY page if we unlock for I/O
+ *
+ *
+ * the code is structured as follows:
+ *
+ * - init the "IN" params in the ufi structure
+ * ReFault:
+ * - do lookups [locks maps], check protection, handle needs_copy
+ * - check for case 0 fault (error)
+ * - establish "range" of fault
+ * - if we have an amap lock it and extract the anons
+ * - if sequential advice deactivate pages behind us
+ * - at the same time check pmap for unmapped areas and anon for pages
+ * that we could map in (and do map it if found)
+ * - check object for resident pages that we could map in
+ * - if (case 2) goto Case2
+ * - >>> handle case 1
+ * - ensure source anon is resident in RAM
+ * - if case 1B alloc new anon and copy from source
+ * - map the correct page in
+ * Case2:
+ * - >>> handle case 2
+ * - ensure source page is resident (if uobj)
+ * - if case 2B alloc new anon and copy from source (could be zero
+ * fill if uobj == NULL)
+ * - map the correct page in
+ * - done!
+ *
+ * note on paging:
+ * if we have to do I/O we place a PG_BUSY page in the correct object,
+ * unlock everything, and do the I/O. when I/O is done we must reverify
+ * the state of the world before assuming that our data structures are
+ * valid. [because mappings could change while the map is unlocked]
+ *
+ * alternative 1: unbusy the page in question and restart the page fault
+ * from the top (ReFault). this is easy but does not take advantage
+ * of the information that we already have from our previous lookup,
+ * although it is possible that the "hints" in the vm_map will help here.
+ *
+ * alternative 2: the system already keeps track of a "version" number of
+ * a map. [i.e. every time you write-lock a map (e.g. to change a
+ * mapping) you bump the version number up by one...] so, we can save
+ * the version number of the map before we release the lock and start I/O.
+ * then when I/O is done we can relock and check the version numbers
+ * to see if anything changed. this might save us some over 1 because
+ * we don't have to unbusy the page and may be less compares(?).
+ *
+ * alternative 3: put in backpointers or a way to "hold" part of a map
+ * in place while I/O is in progress. this could be complex to
+ * implement (especially with structures like amap that can be referenced
+ * by multiple map entries, and figuring out what should wait could be
+ * complex as well...).
+ *
+ * given that we are not currently multiprocessor or multithreaded we might
+ * as well choose alternative 2 now. maybe alternative 3 would be useful
+ * in the future. XXX keep in mind for future consideration//rechecking.
+ */
+
+/*
+ * local data structures
+ */
+
+struct uvm_advice {
+ int advice;
+ int nback;
+ int nforw;
+};
+
+/*
+ * page range array:
+ * note: index in array must match "advice" value
+ * XXX: borrowed numbers from freebsd. do they work well for us?
+ */
+
+static struct uvm_advice uvmadvice[] = {
+ { MADV_NORMAL, 3, 4 },
+ { MADV_RANDOM, 0, 0 },
+ { MADV_SEQUENTIAL, 8, 7},
+};
+
+#define UVM_MAXRANGE 16 /* must be max() of nback+nforw+1 */
+
+/*
+ * private prototypes
+ */
+
+static void uvmfault_amapcopy __P((struct uvm_faultinfo *));
+static __inline void uvmfault_anonflush __P((struct vm_anon **, int));
+
+/*
+ * inline functions
+ */
+
+/*
+ * uvmfault_anonflush: try and deactivate pages in specified anons
+ *
+ * => does not have to deactivate page if it is busy
+ */
+
+static __inline void
+uvmfault_anonflush(anons, n)
+ struct vm_anon **anons;
+ int n;
+{
+ int lcv;
+ struct vm_page *pg;
+
+ for (lcv = 0 ; lcv < n ; lcv++) {
+ if (anons[lcv] == NULL)
+ continue;
+ simple_lock(&anons[lcv]->an_lock);
+ pg = anons[lcv]->u.an_page;
+ if (pg && (pg->flags & PG_BUSY) == 0 && pg->loan_count == 0) {
+ uvm_lock_pageq();
+ if (pg->wire_count == 0) {
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+ uvm_pagedeactivate(pg);
+ }
+ uvm_unlock_pageq();
+ }
+ simple_unlock(&anons[lcv]->an_lock);
+ }
+}
+
+/*
+ * normal functions
+ */
+
+/*
+ * uvmfault_amapcopy: clear "needs_copy" in a map.
+ *
+ * => called with VM data structures unlocked (usually, see below)
+ * => we get a write lock on the maps and clear needs_copy for a VA
+ * => if we are out of RAM we sleep (waiting for more)
+ */
+
+static void
+uvmfault_amapcopy(ufi)
+ struct uvm_faultinfo *ufi;
+{
+
+ /*
+ * while we haven't done the job
+ */
+
+ while (1) {
+
+ /*
+ * no mapping? give up.
+ */
+
+ if (uvmfault_lookup(ufi, TRUE) == FALSE)
+ return;
+
+ /*
+ * copy if needed.
+ */
+
+ if (UVM_ET_ISNEEDSCOPY(ufi->entry))
+ amap_copy(ufi->map, ufi->entry, M_NOWAIT, TRUE,
+ ufi->orig_rvaddr, ufi->orig_rvaddr + 1);
+
+ /*
+ * didn't work? must be out of RAM. unlock and sleep.
+ */
+
+ if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
+ uvmfault_unlockmaps(ufi, TRUE);
+ uvm_wait("fltamapcopy");
+ continue;
+ }
+
+ /*
+ * got it! unlock and return.
+ */
+
+ uvmfault_unlockmaps(ufi, TRUE);
+ return;
+ }
+ /*NOTREACHED*/
+}
+
+/*
+ * uvmfault_anonget: get data in an anon into a non-busy, non-released
+ * page in that anon.
+ *
+ * => maps, amap, and anon locked by caller.
+ * => if we fail (result != VM_PAGER_OK) we unlock everything.
+ * => if we are successful, we return with everything still locked.
+ * => we don't move the page on the queues [gets moved later]
+ * => if we allocate a new page [we_own], it gets put on the queues.
+ * either way, the result is that the page is on the queues at return time
+ * => for pages which are on loan from a uvm_object (and thus are not
+ * owned by the anon): if successful, we return with the owning object
+ * locked. the caller must unlock this object when it unlocks everything
+ * else.
+ */
+
+int uvmfault_anonget(ufi, amap, anon)
+ struct uvm_faultinfo *ufi;
+ struct vm_amap *amap;
+ struct vm_anon *anon;
+{
+ boolean_t we_own; /* we own anon's page? */
+ boolean_t locked; /* did we relock? */
+ struct vm_page *pg;
+ int result;
+ UVMHIST_FUNC("uvmfault_anonget"); UVMHIST_CALLED(maphist);
+
+ result = 0; /* XXX shut up gcc */
+ uvmexp.fltanget++;
+ /* bump rusage counters */
+ if (anon->u.an_page)
+ curproc->p_addr->u_stats.p_ru.ru_minflt++;
+ else
+ curproc->p_addr->u_stats.p_ru.ru_majflt++;
+
+ /*
+ * loop until we get it, or fail.
+ */
+
+ while (1) {
+
+ we_own = FALSE; /* TRUE if we set PG_BUSY on a page */
+ pg = anon->u.an_page;
+
+ /*
+ * if there is a resident page and it is loaned, then anon
+ * may not own it. call out to uvm_anon_lockpage() to ensure
+ * the real owner of the page has been identified and locked.
+ */
+
+ if (pg && pg->loan_count)
+ pg = uvm_anon_lockloanpg(anon);
+
+ /*
+ * page there? make sure it is not busy/released.
+ */
+
+ if (pg) {
+
+ /*
+ * at this point, if the page has a uobject [meaning
+ * we have it on loan], then that uobject is locked
+ * by us! if the page is busy, we drop all the
+ * locks (including uobject) and try again.
+ */
+
+ if ((pg->flags & (PG_BUSY|PG_RELEASED)) == 0) {
+ UVMHIST_LOG(maphist, "<- OK",0,0,0,0);
+ return (VM_PAGER_OK);
+ }
+ pg->flags |= PG_WANTED;
+ uvmexp.fltpgwait++;
+
+ /*
+ * the last unlock must be an atomic unlock+wait on
+ * the owner of page
+ */
+ if (pg->uobject) { /* owner is uobject ? */
+ uvmfault_unlockall(ufi, amap, NULL, anon);
+ UVMHIST_LOG(maphist, " unlock+wait on uobj",0,
+ 0,0,0);
+ UVM_UNLOCK_AND_WAIT(pg,
+ &pg->uobject->vmobjlock,
+ FALSE, "anonget1",0);
+ } else {
+ /* anon owns page */
+ uvmfault_unlockall(ufi, amap, NULL, NULL);
+ UVMHIST_LOG(maphist, " unlock+wait on anon",0,
+ 0,0,0);
+ UVM_UNLOCK_AND_WAIT(pg,&anon->an_lock,0,
+ "anonget2",0);
+ }
+ /* ready to relock and try again */
+
+ } else {
+
+ /*
+ * no page, we must try and bring it in.
+ */
+ pg = uvm_pagealloc(NULL, 0, anon);
+
+ if (pg == NULL) { /* out of RAM. */
+
+ uvmfault_unlockall(ufi, amap, NULL, anon);
+ uvmexp.fltnoram++;
+ UVMHIST_LOG(maphist, " noram -- UVM_WAIT",0,
+ 0,0,0);
+ uvm_wait("flt_noram1");
+ /* ready to relock and try again */
+
+ } else {
+
+ /* we set the PG_BUSY bit */
+ we_own = TRUE;
+ uvmfault_unlockall(ufi, amap, NULL, anon);
+
+ /*
+ * we are passing a PG_BUSY+PG_FAKE+PG_CLEAN
+ * page into the uvm_swap_get function with
+ * all data structures unlocked. note that
+ * it is ok to read an_swslot here because
+ * we hold PG_BUSY on the page.
+ */
+ uvmexp.pageins++;
+ result = uvm_swap_get(pg, anon->an_swslot,
+ PGO_SYNCIO);
+
+ /*
+ * we clean up after the i/o below in the
+ * "we_own" case
+ */
+ /* ready to relock and try again */
+ }
+ }
+
+ /*
+ * now relock and try again
+ */
+
+ locked = uvmfault_relock(ufi);
+ if (locked) {
+ amap_lock(amap);
+ }
+ if (locked || we_own)
+ simple_lock(&anon->an_lock);
+
+ /*
+ * if we own the page (i.e. we set PG_BUSY), then we need
+ * to clean up after the I/O. there are three cases to
+ * consider:
+ * [1] page released during I/O: free anon and ReFault.
+ * [2] I/O not OK. free the page and cause the fault
+ * to fail.
+ * [3] I/O OK! activate the page and sync with the
+ * non-we_own case (i.e. drop anon lock if not locked).
+ */
+
+ if (we_own) {
+
+ if (pg->flags & PG_WANTED) {
+ /* still holding object lock */
+ thread_wakeup(pg);
+ }
+ /* un-busy! */
+ pg->flags &= ~(PG_WANTED|PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+
+ /*
+ * if we were RELEASED during I/O, then our anon is
+ * no longer part of an amap. we need to free the
+ * anon and try again.
+ */
+ if (pg->flags & PG_RELEASED) {
+ pmap_page_protect(PMAP_PGARG(pg),
+ VM_PROT_NONE); /* to be safe */
+ simple_unlock(&anon->an_lock);
+ uvm_anfree(anon); /* frees page for us */
+ if (locked)
+ uvmfault_unlockall(ufi, amap, NULL, NULL);
+ uvmexp.fltpgrele++;
+ UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+ return (VM_PAGER_REFAULT); /* refault! */
+ }
+
+ if (result != VM_PAGER_OK) {
+#ifdef DIAGNOSTIC
+ if (result == VM_PAGER_PEND)
+ panic("uvmfault_anonget: got PENDING for non-async I/O");
+#endif
+ /* remove page from anon */
+ anon->u.an_page = NULL;
+
+ /*
+ * note: page was never !PG_BUSY, so it
+ * can't be mapped and thus no need to
+ * pmap_page_protect it...
+ */
+ uvm_lock_pageq();
+ uvm_pagefree(pg);
+ uvm_unlock_pageq();
+
+ if (locked)
+ uvmfault_unlockall(ufi, amap, NULL,
+ anon);
+ else
+ simple_unlock(&anon->an_lock);
+ UVMHIST_LOG(maphist, "<- ERROR", 0,0,0,0);
+ return (VM_PAGER_ERROR);
+ }
+
+ /*
+ * must be OK, clear modify (already PG_CLEAN)
+ * and activate
+ */
+ pmap_clear_modify(PMAP_PGARG(pg));
+ uvm_lock_pageq();
+ uvm_pageactivate(pg);
+ uvm_unlock_pageq();
+ if (!locked)
+ simple_unlock(&anon->an_lock);
+ }
+
+ /*
+ * we were not able to relock. restart fault.
+ */
+
+ if (!locked) {
+ UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+ return (VM_PAGER_REFAULT);
+ }
+
+ /*
+ * verify no one has touched the amap and moved the anon on us.
+ */
+
+ if (amap_lookup(&ufi->entry->aref,
+ ufi->orig_rvaddr - ufi->entry->start) != anon) {
+
+ uvmfault_unlockall(ufi, amap, NULL, anon);
+ UVMHIST_LOG(maphist, "<- REFAULT", 0,0,0,0);
+ return (VM_PAGER_REFAULT);
+ }
+
+ /*
+ * try it again!
+ */
+
+ uvmexp.fltanretry++;
+ continue;
+
+ } /* while (1) */
+
+ /*NOTREACHED*/
+}
+
+/*
+ * F A U L T - m a i n e n t r y p o i n t
+ */
+
+/*
+ * uvm_fault: page fault handler
+ *
+ * => called from MD code to resolve a page fault
+ * => VM data structures usually should be unlocked. however, it is
+ * possible to call here with the main map locked if the caller
+ * gets a write lock, sets it recusive, and then calls us (c.f.
+ * uvm_map_pageable). this should be avoided because it keeps
+ * the map locked off during I/O.
+ */
+
+int
+uvm_fault(orig_map, vaddr, fault_type, access_type)
+ vm_map_t orig_map;
+ vaddr_t vaddr;
+ vm_fault_t fault_type;
+ vm_prot_t access_type;
+{
+ struct uvm_faultinfo ufi;
+ vm_prot_t enter_prot;
+ boolean_t wired, narrow, promote, locked, shadowed;
+ int npages, nback, nforw, centeridx, result, lcv, gotpages;
+ vaddr_t startva, objaddr, currva, offset;
+ paddr_t pa;
+ struct vm_amap *amap;
+ struct uvm_object *uobj;
+ struct vm_anon *anons_store[UVM_MAXRANGE], **anons, *anon, *oanon;
+ struct vm_page *pages[UVM_MAXRANGE], *pg, *uobjpage;
+ UVMHIST_FUNC("uvm_fault"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, vaddr=0x%x, ft=%d, at=%d)",
+ orig_map, vaddr, fault_type, access_type);
+
+ anon = NULL; /* XXX: shut up gcc */
+
+ uvmexp.faults++; /* XXX: locking? */
+
+ /*
+ * init the IN parameters in the ufi
+ */
+
+ ufi.orig_map = orig_map;
+ ufi.orig_rvaddr = trunc_page(vaddr);
+ ufi.orig_size = PAGE_SIZE; /* can't get any smaller than this */
+ if (fault_type == VM_FAULT_WIRE)
+ narrow = TRUE; /* don't look for neighborhood
+ * pages on wire */
+ else
+ narrow = FALSE; /* normal fault */
+
+ /*
+ * "goto ReFault" means restart the page fault from ground zero.
+ */
+ReFault:
+
+ /*
+ * lookup and lock the maps
+ */
+
+ if (uvmfault_lookup(&ufi, FALSE) == FALSE) {
+ UVMHIST_LOG(maphist, "<- no mapping @ 0x%x", vaddr, 0,0,0);
+ return (KERN_INVALID_ADDRESS);
+ }
+ /* locked: maps(read) */
+
+ /*
+ * check protection
+ */
+
+ if ((ufi.entry->protection & access_type) != access_type) {
+ UVMHIST_LOG(maphist,
+ "<- protection failure (prot=0x%x, access=0x%x)",
+ ufi.entry->protection, access_type, 0, 0);
+ uvmfault_unlockmaps(&ufi, FALSE);
+ return (KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * "enter_prot" is the protection we want to enter the page in at.
+ * for certain pages (e.g. copy-on-write pages) this protection can
+ * be more strict than ufi.entry->protection. "wired" means either
+ * the entry is wired or we are fault-wiring the pg.
+ */
+
+ enter_prot = ufi.entry->protection;
+ wired = (ufi.entry->wired_count != 0) || (fault_type == VM_FAULT_WIRE);
+ if (wired)
+ access_type = enter_prot; /* full access for wired */
+
+ /*
+ * handle "needs_copy" case. if we need to copy the amap we will
+ * have to drop our readlock and relock it with a write lock. (we
+ * need a write lock to change anything in a map entry [e.g.
+ * needs_copy]).
+ */
+
+ if (UVM_ET_ISNEEDSCOPY(ufi.entry)) {
+ if ((access_type & VM_PROT_WRITE) ||
+ (ufi.entry->object.uvm_obj == NULL)) {
+ /* need to clear */
+ UVMHIST_LOG(maphist,
+ " need to clear needs_copy and refault",0,0,0,0);
+ uvmfault_unlockmaps(&ufi, FALSE);
+ uvmfault_amapcopy(&ufi);
+ uvmexp.fltamcopy++;
+ goto ReFault;
+
+ } else {
+
+ /*
+ * ensure that we pmap_enter page R/O since
+ * needs_copy is still true
+ */
+ enter_prot = enter_prot & ~VM_PROT_WRITE;
+
+ }
+ }
+
+ /*
+ * identify the players
+ */
+
+ amap = ufi.entry->aref.ar_amap; /* top layer */
+ uobj = ufi.entry->object.uvm_obj; /* bottom layer */
+
+ /*
+ * check for a case 0 fault. if nothing backing the entry then
+ * error now.
+ */
+
+ if (amap == NULL && uobj == NULL) {
+ uvmfault_unlockmaps(&ufi, FALSE);
+ UVMHIST_LOG(maphist,"<- no backing store, no overlay",0,0,0,0);
+ return (KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * establish range of interest based on advice from mapper
+ * and then clip to fit map entry. note that we only want
+ * to do this the first time through the fault. if we
+ * ReFault we will disable this by setting "narrow" to true.
+ */
+
+ if (narrow == FALSE) {
+
+ /* wide fault (!narrow) */
+#ifdef DIAGNOSTIC
+ if (uvmadvice[ufi.entry->advice].advice != ufi.entry->advice)
+ panic("fault: advice mismatch!");
+#endif
+ nback = min(uvmadvice[ufi.entry->advice].nback,
+ (ufi.orig_rvaddr - ufi.entry->start) >> PAGE_SHIFT);
+ startva = ufi.orig_rvaddr - (nback << PAGE_SHIFT);
+ nforw = min(uvmadvice[ufi.entry->advice].nforw,
+ ((ufi.entry->end - ufi.orig_rvaddr) >>
+ PAGE_SHIFT) - 1);
+ /*
+ * note: "-1" because we don't want to count the
+ * faulting page as forw
+ */
+ npages = nback + nforw + 1;
+ centeridx = nback;
+
+ narrow = FALSE; /* ensure only once per-fault */
+
+ } else {
+
+ /* narrow fault! */
+ nback = nforw = 0;
+ startva = ufi.orig_rvaddr;
+ npages = 1;
+ centeridx = 0;
+
+ }
+
+ /* locked: maps(read) */
+ UVMHIST_LOG(maphist, " narrow=%d, back=%d, forw=%d, startva=0x%x",
+ narrow, nback, nforw, startva);
+ UVMHIST_LOG(maphist, " entry=0x%x, amap=0x%x, obj=0x%x", ufi.entry,
+ amap, uobj, 0);
+
+ /*
+ * if we've got an amap, lock it and extract current anons.
+ */
+
+ if (amap) {
+ amap_lock(amap);
+ anons = anons_store;
+ amap_lookups(&ufi.entry->aref, startva - ufi.entry->start,
+ anons, npages);
+ } else {
+ anons = NULL; /* to be safe */
+ }
+
+ /* locked: maps(read), amap(if there) */
+
+ /*
+ * for MADV_SEQUENTIAL mappings we want to deactivate the back pages
+ * now and then forget about them (for the rest of the fault).
+ */
+
+ if (ufi.entry->advice == MADV_SEQUENTIAL) {
+
+ UVMHIST_LOG(maphist, " MADV_SEQUENTIAL: flushing backpages",
+ 0,0,0,0);
+ /* flush back-page anons? */
+ if (amap)
+ uvmfault_anonflush(anons, nback);
+
+ /* flush object? */
+ if (uobj) {
+ objaddr =
+ (startva - ufi.entry->start) + ufi.entry->offset;
+ simple_lock(&uobj->vmobjlock);
+ (void) uobj->pgops->pgo_flush(uobj, objaddr, objaddr +
+ (nback << PAGE_SHIFT), PGO_DEACTIVATE);
+ simple_unlock(&uobj->vmobjlock);
+ }
+
+ /* now forget about the backpages */
+ if (amap)
+ anons += nback;
+ startva = startva + (nback << PAGE_SHIFT);
+ npages -= nback;
+ nback = centeridx = 0;
+ }
+
+ /* locked: maps(read), amap(if there) */
+
+ /*
+ * map in the backpages and frontpages we found in the amap in hopes
+ * of preventing future faults. we also init the pages[] array as
+ * we go.
+ */
+
+ currva = startva;
+ shadowed = FALSE;
+ for (lcv = 0 ; lcv < npages ; lcv++, currva += PAGE_SIZE) {
+
+ /*
+ * dont play with VAs that are already mapped
+ * except for center)
+ * XXX: return value of pmap_extract disallows PA 0
+ */
+ if (lcv != centeridx) {
+ pa = pmap_extract(ufi.orig_map->pmap, currva);
+ if (pa != NULL) {
+ pages[lcv] = PGO_DONTCARE;
+ continue;
+ }
+ }
+
+ /*
+ * unmapped or center page. check if any anon at this level.
+ */
+ if (amap == NULL || anons[lcv] == NULL) {
+ pages[lcv] = NULL;
+ continue;
+ }
+
+ /*
+ * check for present page and map if possible. re-activate it.
+ */
+
+ pages[lcv] = PGO_DONTCARE;
+ if (lcv == centeridx) { /* save center for later! */
+ shadowed = TRUE;
+ continue;
+ }
+ anon = anons[lcv];
+ simple_lock(&anon->an_lock);
+ /* ignore loaned pages */
+ if (anon->u.an_page && anon->u.an_page->loan_count == 0 &&
+ (anon->u.an_page->flags & (PG_RELEASED|PG_BUSY)) == 0) {
+ uvm_lock_pageq();
+ uvm_pageactivate(anon->u.an_page); /* reactivate */
+ uvm_unlock_pageq();
+ UVMHIST_LOG(maphist,
+ " MAPPING: n anon: pm=0x%x, va=0x%x, pg=0x%x",
+ ufi.orig_map->pmap, currva, anon->u.an_page, 0);
+ uvmexp.fltnamap++;
+ pmap_enter(ufi.orig_map->pmap, currva,
+ VM_PAGE_TO_PHYS(anon->u.an_page),
+ (anon->an_ref > 1) ? VM_PROT_READ : enter_prot,
+ (ufi.entry->wired_count != 0));
+ }
+ simple_unlock(&anon->an_lock);
+ }
+
+ /* locked: maps(read), amap(if there) */
+ /* (shadowed == TRUE) if there is an anon at the faulting address */
+ UVMHIST_LOG(maphist, " shadowed=%d, will_get=%d", shadowed,
+ (uobj && shadowed == FALSE),0,0);
+
+ /*
+ * note that if we are really short of RAM we could sleep in the above
+ * call to pmap_enter with everything locked. bad?
+ * XXXCDC: this is fixed in PMAP_NEW (no sleep alloc's in pmap)
+ */
+
+ /*
+ * if the desired page is not shadowed by the amap and we have a
+ * backing object, then we check to see if the backing object would
+ * prefer to handle the fault itself (rather than letting us do it
+ * with the usual pgo_get hook). the backing object signals this by
+ * providing a pgo_fault routine.
+ */
+
+ if (uobj && shadowed == FALSE && uobj->pgops->pgo_fault != NULL) {
+
+ simple_lock(&uobj->vmobjlock);
+
+ /* locked: maps(read), amap (if there), uobj */
+ result = uobj->pgops->pgo_fault(&ufi, startva, pages, npages,
+ centeridx, fault_type, access_type,
+ PGO_LOCKED);
+ /* locked: nothing, pgo_fault has unlocked everything */
+
+ if (result == VM_PAGER_OK)
+ return (KERN_SUCCESS); /* pgo_fault did pmap enter */
+ else if (result == VM_PAGER_REFAULT)
+ goto ReFault; /* try again! */
+ else
+ return (KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * now, if the desired page is not shadowed by the amap and we have
+ * a backing object that does not have a special fault routine, then
+ * we ask (with pgo_get) the object for resident pages that we care
+ * about and attempt to map them in. we do not let pgo_get block
+ * (PGO_LOCKED).
+ *
+ * ("get" has the option of doing a pmap_enter for us)
+ */
+
+ if (uobj && shadowed == FALSE) {
+ simple_lock(&uobj->vmobjlock);
+
+ /* locked (!shadowed): maps(read), amap (if there), uobj */
+ /*
+ * the following call to pgo_get does _not_ change locking state
+ */
+
+ uvmexp.fltlget++;
+ gotpages = npages;
+ result = uobj->pgops->pgo_get(uobj, ufi.entry->offset +
+ (startva - ufi.entry->start),
+ pages, &gotpages, centeridx,
+ UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+ VM_PROT_READ : access_type,
+ ufi.entry->advice, PGO_LOCKED);
+
+ /*
+ * check for pages to map, if we got any
+ */
+
+ uobjpage = NULL;
+
+ if (gotpages) {
+ currva = startva;
+ for (lcv = 0 ; lcv < npages ;
+ lcv++, currva += PAGE_SIZE) {
+
+ if (pages[lcv] == NULL ||
+ pages[lcv] == PGO_DONTCARE)
+ continue;
+
+#ifdef DIAGNOSTIC
+ /*
+ * pager sanity check: pgo_get with
+ * PGO_LOCKED should never return a
+ * released page to us.
+ */
+ if (pages[lcv]->flags & PG_RELEASED)
+ panic("uvm_fault: pgo_get PGO_LOCKED gave us a RELEASED page");
+#endif
+
+ /*
+ * if center page is resident and not
+ * PG_BUSY|PG_RELEASED then pgo_get
+ * made it PG_BUSY for us and gave
+ * us a handle to it. remember this
+ * page as "uobjpage." (for later use).
+ */
+
+ if (lcv == centeridx) {
+ uobjpage = pages[lcv];
+ UVMHIST_LOG(maphist, " got uobjpage (0x%x) with locked get",
+ uobjpage, 0,0,0);
+ continue;
+ }
+
+ /*
+ * note: calling pgo_get with locked data
+ * structures returns us pages which are
+ * neither busy nor released, so we don't
+ * need to check for this. we can just
+ * directly enter the page (after moving it
+ * to the head of the active queue [useful?]).
+ */
+
+ uvm_lock_pageq();
+ uvm_pageactivate(pages[lcv]); /* reactivate */
+ uvm_unlock_pageq();
+ UVMHIST_LOG(maphist,
+ " MAPPING: n obj: pm=0x%x, va=0x%x, pg=0x%x",
+ ufi.orig_map->pmap, currva, pages[lcv], 0);
+ uvmexp.fltnomap++;
+ pmap_enter(ufi.orig_map->pmap, currva,
+ VM_PAGE_TO_PHYS(pages[lcv]),
+ UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+ VM_PROT_READ : enter_prot, wired);
+
+ /*
+ * NOTE: page can't be PG_WANTED or PG_RELEASED
+ * because we've held the lock the whole time
+ * we've had the handle.
+ */
+ pages[lcv]->flags &= ~(PG_BUSY); /* un-busy! */
+ UVM_PAGE_OWN(pages[lcv], NULL);
+
+ /* done! */
+ } /* for "lcv" loop */
+ } /* "gotpages" != 0 */
+
+ /* note: object still _locked_ */
+ } else {
+
+ uobjpage = NULL;
+
+ }
+
+ /* locked (shadowed): maps(read), amap */
+ /* locked (!shadowed): maps(read), amap(if there),
+ uobj(if !null), uobjpage(if !null) */
+
+ /*
+ * note that at this point we are done with any front or back pages.
+ * we are now going to focus on the center page (i.e. the one we've
+ * faulted on). if we have faulted on the top (anon) layer
+ * [i.e. case 1], then the anon we want is anons[centeridx] (we have
+ * not touched it yet). if we have faulted on the bottom (uobj)
+ * layer [i.e. case 2] and the page was both present and available,
+ * then we've got a pointer to it as "uobjpage" and we've already
+ * made it BUSY.
+ */
+
+ /*
+ * there are four possible cases we must address: 1A, 1B, 2A, and 2B
+ */
+
+ /*
+ * redirect case 2: if we are not shadowed, go to case 2.
+ */
+
+ if (shadowed == FALSE)
+ goto Case2;
+
+ /* locked: maps(read), amap */
+
+ /*
+ * handle case 1: fault on an anon in our amap
+ */
+
+ anon = anons[centeridx];
+ UVMHIST_LOG(maphist, " case 1 fault: anon=0x%x", anon, 0,0,0);
+ simple_lock(&anon->an_lock);
+
+ /* locked: maps(read), amap, anon */
+
+ /*
+ * no matter if we have case 1A or case 1B we are going to need to
+ * have the anon's memory resident. ensure that now.
+ */
+
+ /*
+ * let uvmfault_anonget do the dirty work. if it fails (!OK) it will
+ * unlock for us. if it is OK, locks are still valid and locked.
+ * also, if it is OK, then the anon's page is on the queues.
+ * if the page is on loan from a uvm_object, then anonget will
+ * lock that object for us if it does not fail.
+ */
+
+ result = uvmfault_anonget(&ufi, amap, anon);
+
+ if (result == VM_PAGER_REFAULT)
+ goto ReFault;
+
+ if (result == VM_PAGER_AGAIN) {
+ tsleep((caddr_t)&lbolt, PVM, "fltagain1", 0);
+ goto ReFault;
+ }
+
+ if (result != VM_PAGER_OK)
+ return (KERN_PROTECTION_FAILURE); /* XXX??? */
+
+ /*
+ * uobj is non null if the page is on loan from an object (i.e. uobj)
+ */
+
+ uobj = anon->u.an_page->uobject; /* locked by anonget if !NULL */
+
+ /* locked: maps(read), amap, anon, uobj(if one) */
+
+ /*
+ * special handling for loaned pages
+ */
+ if (anon->u.an_page->loan_count) {
+
+ if ((access_type & VM_PROT_WRITE) == 0) {
+
+ /*
+ * for read faults on loaned pages we just cap the
+ * protection at read-only.
+ */
+
+ enter_prot = enter_prot & ~VM_PROT_WRITE;
+
+ } else {
+ /*
+ * note that we can't allow writes into a loaned page!
+ *
+ * if we have a write fault on a loaned page in an
+ * anon then we need to look at the anon's ref count.
+ * if it is greater than one then we are going to do
+ * a normal copy-on-write fault into a new anon (this
+ * is not a problem). however, if the reference count
+ * is one (a case where we would normally allow a
+ * write directly to the page) then we need to kill
+ * the loan before we continue.
+ */
+
+ /* >1 case is already ok */
+ if (anon->an_ref == 1) {
+
+ /* get new un-owned replacement page */
+ pg = uvm_pagealloc(NULL, 0, NULL);
+ if (pg == NULL) {
+ uvmfault_unlockall(&ufi, amap, uobj,
+ anon);
+ uvm_wait("flt_noram2");
+ goto ReFault;
+ }
+
+ /*
+ * copy data, kill loan, and drop uobj lock
+ * (if any)
+ */
+ /* copy old -> new */
+ uvm_pagecopy(anon->u.an_page, pg);
+
+ /* force reload */
+ pmap_page_protect(PMAP_PGARG(anon->u.an_page),
+ VM_PROT_NONE);
+ uvm_lock_pageq(); /* KILL loan */
+ if (uobj)
+ /* if we were loaning */
+ anon->u.an_page->loan_count--;
+ anon->u.an_page->uanon = NULL;
+ /* in case we owned */
+ anon->u.an_page->pqflags &= ~PQ_ANON;
+ uvm_unlock_pageq();
+ if (uobj) {
+ simple_unlock(&uobj->vmobjlock);
+ uobj = NULL;
+ }
+
+ /* install new page in anon */
+ anon->u.an_page = pg;
+ pg->uanon = anon;
+ pg->pqflags |= PQ_ANON;
+ pg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+
+ /* done! */
+ } /* ref == 1 */
+ } /* write fault */
+ } /* loan count */
+
+ /*
+ * if we are case 1B then we will need to allocate a new blank
+ * anon to transfer the data into. note that we have a lock
+ * on anon, so no one can busy or release the page until we are done.
+ * also note that the ref count can't drop to zero here because
+ * it is > 1 and we are only dropping one ref.
+ *
+ * in the (hopefully very rare) case that we are out of RAM we
+ * will unlock, wait for more RAM, and refault.
+ *
+ * if we are out of anon VM we kill the process (XXX: could wait?).
+ */
+
+ if ((access_type & VM_PROT_WRITE) != 0 && anon->an_ref > 1) {
+
+ UVMHIST_LOG(maphist, " case 1B: COW fault",0,0,0,0);
+ uvmexp.flt_acow++;
+ oanon = anon; /* oanon = old, locked anon */
+ anon = uvm_analloc();
+ if (anon)
+ pg = uvm_pagealloc(NULL, 0, anon);
+#ifdef __GNUC__
+ else
+ pg = NULL; /* XXX: gcc */
+#endif
+
+ /* check for out of RAM */
+ if (anon == NULL || pg == NULL) {
+ if (anon)
+ uvm_anfree(anon);
+ uvmfault_unlockall(&ufi, amap, uobj, oanon);
+ if (anon == NULL) {
+ UVMHIST_LOG(maphist,
+ "<- failed. out of VM",0,0,0,0);
+ uvmexp.fltnoanon++;
+ /* XXX: OUT OF VM, ??? */
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ uvmexp.fltnoram++;
+ uvm_wait("flt_noram3"); /* out of RAM, wait for more */
+ goto ReFault;
+ }
+
+ /* got all resources, replace anon with nanon */
+
+ uvm_pagecopy(oanon->u.an_page, pg); /* pg now !PG_CLEAN */
+ pg->flags &= ~(PG_BUSY|PG_FAKE); /* un-busy! new page */
+ UVM_PAGE_OWN(pg, NULL);
+ amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
+ anon, 1);
+
+ /* deref: can not drop to zero here by defn! */
+ oanon->an_ref--;
+
+ /*
+ * note: oanon still locked. anon is _not_ locked, but we
+ * have the sole references to in from amap which _is_ locked.
+ * thus, no one can get at it until we are done with it.
+ */
+
+ } else {
+
+ uvmexp.flt_anon++;
+ oanon = anon; /* old, locked anon is same as anon */
+ pg = anon->u.an_page;
+ if (anon->an_ref > 1) /* disallow writes to ref > 1 anons */
+ enter_prot = enter_prot & ~VM_PROT_WRITE;
+
+ }
+
+ /* locked: maps(read), amap, anon */
+
+ /*
+ * now map the page in ...
+ * XXX: old fault unlocks object before pmap_enter. this seems
+ * suspect since some other thread could blast the page out from
+ * under us between the unlock and the pmap_enter.
+ */
+
+ UVMHIST_LOG(maphist, " MAPPING: anon: pm=0x%x, va=0x%x, pg=0x%x",
+ ufi.orig_map->pmap, ufi.orig_rvaddr, pg, 0);
+ pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
+ enter_prot, wired);
+
+ /*
+ * ... and update the page queues.
+ */
+
+ uvm_lock_pageq();
+
+ if (fault_type == VM_FAULT_WIRE) {
+ uvm_pagewire(pg);
+ } else {
+ /* activate it */
+ uvm_pageactivate(pg);
+
+ }
+
+ uvm_unlock_pageq();
+
+ /*
+ * done case 1! finish up by unlocking everything and returning success
+ */
+
+ uvmfault_unlockall(&ufi, amap, uobj, oanon);
+ return (KERN_SUCCESS);
+
+
+Case2:
+ /*
+ * handle case 2: faulting on backing object or zero fill
+ */
+
+ /*
+ * locked:
+ * maps(read), amap(if there), uobj(if !null), uobjpage(if !null)
+ */
+
+ /*
+ * note that uobjpage can not be PGO_DONTCARE at this point. we now
+ * set uobjpage to PGO_DONTCARE if we are doing a zero fill. if we
+ * have a backing object, check and see if we are going to promote
+ * the data up to an anon during the fault.
+ */
+
+ if (uobj == NULL) {
+ uobjpage = PGO_DONTCARE;
+ promote = TRUE; /* always need anon here */
+ } else {
+ /* assert(uobjpage != PGO_DONTCARE) */
+ promote = (access_type & VM_PROT_WRITE) &&
+ UVM_ET_ISCOPYONWRITE(ufi.entry);
+ }
+ UVMHIST_LOG(maphist, " case 2 fault: promote=%d, zfill=%d",
+ promote, (uobj == NULL), 0,0);
+
+ /*
+ * if uobjpage is not null then we do not need to do I/O to get the
+ * uobjpage.
+ *
+ * if uobjpage is null, then we need to unlock and ask the pager to
+ * get the data for us. once we have the data, we need to reverify
+ * the state the world. we are currently not holding any resources.
+ */
+
+ if (uobjpage) {
+ /* update rusage counters */
+ curproc->p_addr->u_stats.p_ru.ru_minflt++;
+ } else {
+ /* update rusage counters */
+ curproc->p_addr->u_stats.p_ru.ru_majflt++;
+
+ /* locked: maps(read), amap(if there), uobj */
+ uvmfault_unlockall(&ufi, amap, NULL, NULL);
+ /* locked: uobj */
+
+ uvmexp.fltget++;
+ gotpages = 1;
+ result = uobj->pgops->pgo_get(uobj,
+ (ufi.orig_rvaddr - ufi.entry->start) + ufi.entry->offset,
+ &uobjpage, &gotpages, 0,
+ UVM_ET_ISCOPYONWRITE(ufi.entry) ?
+ VM_PROT_READ : access_type,
+ ufi.entry->advice, 0);
+
+ /* locked: uobjpage(if result OK) */
+
+ /*
+ * recover from I/O
+ */
+
+ if (result != VM_PAGER_OK) {
+
+#ifdef DIAGNOSTIC
+ if (result == VM_PAGER_PEND)
+ panic("uvm_fault: pgo_get got PENDing on non-async I/O");
+#endif
+
+ if (result == VM_PAGER_AGAIN) {
+ UVMHIST_LOG(maphist, " pgo_get says TRY AGAIN!",0,0,0,0);
+ tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
+ goto ReFault;
+ }
+
+ UVMHIST_LOG(maphist, "<- pgo_get failed (code %d)",
+ result, 0,0,0);
+ return (KERN_PROTECTION_FAILURE); /* XXX i/o error */
+ }
+
+ /* locked: uobjpage */
+
+ /*
+ * re-verify the state of the world by first trying to relock
+ * the maps. always relock the object.
+ */
+
+ locked = uvmfault_relock(&ufi);
+ if (locked && amap)
+ amap_lock(amap);
+ simple_lock(&uobj->vmobjlock);
+
+ /* locked(locked): maps(read), amap(if !null), uobj, uobjpage */
+ /* locked(!locked): uobj, uobjpage */
+
+ /*
+ * verify that the page has not be released and re-verify
+ * that amap slot is still free. if there is a problem,
+ * we unlock and clean up.
+ */
+
+ if ((uobjpage->flags & PG_RELEASED) != 0 ||
+ (locked && amap &&
+ amap_lookup(&ufi.entry->aref,
+ ufi.orig_rvaddr - ufi.entry->start))) {
+ if (locked)
+ uvmfault_unlockall(&ufi, amap, NULL, NULL);
+ locked = FALSE;
+ }
+
+ /*
+ * didn't get the lock? release the page and retry.
+ */
+
+ if (locked == FALSE) {
+
+ UVMHIST_LOG(maphist,
+ " wasn't able to relock after fault: retry",
+ 0,0,0,0);
+ if (uobjpage->flags & PG_WANTED)
+ /* still holding object lock */
+ thread_wakeup(uobjpage);
+
+ if (uobjpage->flags & PG_RELEASED) {
+ uvmexp.fltpgrele++;
+#ifdef DIAGNOSTIC
+ if (uobj->pgops->pgo_releasepg == NULL)
+ panic("uvm_fault: object has no releasepg function");
+#endif
+ /* frees page */
+ if (uobj->pgops->pgo_releasepg(uobjpage,NULL))
+ /* unlock if still alive */
+ simple_unlock(&uobj->vmobjlock);
+ goto ReFault;
+ }
+
+ uvm_lock_pageq();
+ /* make sure it is in queues */
+ uvm_pageactivate(uobjpage);
+
+ uvm_unlock_pageq();
+ uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(uobjpage, NULL);
+ simple_unlock(&uobj->vmobjlock);
+ goto ReFault;
+
+ }
+
+ /*
+ * we have the data in uobjpage which is PG_BUSY and
+ * !PG_RELEASED. we are holding object lock (so the page
+ * can't be released on us).
+ */
+
+ /* locked: maps(read), amap(if !null), uobj, uobjpage */
+
+ }
+
+ /*
+ * locked:
+ * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
+ */
+
+ /*
+ * notes:
+ * - at this point uobjpage can not be NULL
+ * - at this point uobjpage can not be PG_RELEASED (since we checked
+ * for it above)
+ * - at this point uobjpage could be PG_WANTED (handle later)
+ */
+
+ if (promote == FALSE) {
+
+ /*
+ * we are not promoting. if the mapping is COW ensure that we
+ * don't give more access than we should (e.g. when doing a read
+ * fault on a COPYONWRITE mapping we want to map the COW page in
+ * R/O even though the entry protection could be R/W).
+ *
+ * set "pg" to the page we want to map in (uobjpage, usually)
+ */
+
+ uvmexp.flt_obj++;
+ if (UVM_ET_ISCOPYONWRITE(ufi.entry))
+ enter_prot = enter_prot & ~VM_PROT_WRITE;
+ pg = uobjpage; /* map in the actual object */
+
+ /* assert(uobjpage != PGO_DONTCARE) */
+
+ /*
+ * we are faulting directly on the page. be careful
+ * about writing to loaned pages...
+ */
+ if (uobjpage->loan_count) {
+
+ if ((access_type & VM_PROT_WRITE) == 0) {
+ /* read fault: cap the protection at readonly */
+ /* cap! */
+ enter_prot = enter_prot & ~VM_PROT_WRITE;
+ } else {
+ /* write fault: must break the loan here */
+
+ /* alloc new un-owned page */
+ pg = uvm_pagealloc(NULL, 0, NULL);
+
+ if (pg == NULL) {
+ /*
+ * drop ownership of page, it can't
+ * be released
+ * */
+ if (uobjpage->flags & PG_WANTED)
+ thread_wakeup(uobjpage);
+ uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(uobjpage, NULL);
+
+ uvm_lock_pageq();
+ /* activate: we will need it later */
+ uvm_pageactivate(uobjpage);
+
+ uvm_unlock_pageq();
+ uvmfault_unlockall(&ufi, amap, uobj,
+ NULL);
+ UVMHIST_LOG(maphist,
+ " out of RAM breaking loan, waiting", 0,0,0,0);
+ uvmexp.fltnoram++;
+ uvm_wait("flt_noram4");
+ goto ReFault;
+ }
+
+ /*
+ * copy the data from the old page to the new
+ * one and clear the fake/clean flags on the
+ * new page (keep it busy). force a reload
+ * of the old page by clearing it from all
+ * pmaps. then lock the page queues to
+ * rename the pages.
+ */
+ uvm_pagecopy(uobjpage, pg); /* old -> new */
+ pg->flags &= ~(PG_FAKE|PG_CLEAN);
+ pmap_page_protect(PMAP_PGARG(uobjpage),
+ VM_PROT_NONE);
+ if (uobjpage->flags & PG_WANTED)
+ thread_wakeup(uobjpage);
+ /* uobj still locked */
+ uobjpage->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(uobjpage, NULL);
+
+ uvm_lock_pageq();
+ offset = uobjpage->offset;
+ /* remove old page */
+ uvm_pagerealloc(uobjpage, NULL, 0);
+
+ /*
+ * at this point we have absolutely no
+ * control over uobjpage
+ */
+ /* install new page */
+ uvm_pagerealloc(pg, uobj, offset);
+ uvm_unlock_pageq();
+
+ /*
+ * done! loan is broken and "pg" is
+ * PG_BUSY. it can now replace uobjpage.
+ */
+
+ uobjpage = pg;
+
+ } /* write fault case */
+ } /* if loan_count */
+
+ } else {
+
+ /*
+ * if we are going to promote the data to an anon we
+ * allocate a blank anon here and plug it into our amap.
+ */
+#if DIAGNOSTIC
+ if (amap == NULL)
+ panic("uvm_fault: want to promote data, but no anon");
+#endif
+
+ anon = uvm_analloc();
+ if (anon)
+ pg = uvm_pagealloc(NULL, 0, anon); /* BUSY+CLEAN+FAKE */
+#ifdef __GNUC__
+ else
+ pg = NULL; /* XXX: gcc */
+#endif
+
+ /*
+ * out of memory resources?
+ */
+ if (anon == NULL || pg == NULL) {
+
+ /*
+ * arg! must unbusy our page and fail or sleep.
+ */
+ if (uobjpage != PGO_DONTCARE) {
+ if (uobjpage->flags & PG_WANTED)
+ /* still holding object lock */
+ thread_wakeup(uobjpage);
+
+ uvm_lock_pageq();
+ /* make sure it is in queues */
+ uvm_pageactivate(uobjpage);
+ uvm_unlock_pageq();
+ /* un-busy! (still locked) */
+ uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(uobjpage, NULL);
+ }
+
+ /* unlock and fail ... */
+ uvmfault_unlockall(&ufi, amap, uobj, NULL);
+ if (anon == NULL) {
+ UVMHIST_LOG(maphist, " promote: out of VM",
+ 0,0,0,0);
+ uvmexp.fltnoanon++;
+ /* XXX: out of VM */
+ return (KERN_RESOURCE_SHORTAGE);
+ }
+ UVMHIST_LOG(maphist, " out of RAM, waiting for more",
+ 0,0,0,0);
+ uvm_anfree(anon);
+ uvmexp.fltnoram++;
+ uvm_wait("flt_noram5");
+ goto ReFault;
+ }
+
+ /*
+ * fill in the data
+ */
+
+ if (uobjpage != PGO_DONTCARE) {
+ uvmexp.flt_prcopy++;
+ /* copy page [pg now dirty] */
+ uvm_pagecopy(uobjpage, pg);
+
+ /*
+ * promote to shared amap? make sure all sharing
+ * procs see it
+ */
+ if ((amap_flags(amap) & AMAP_SHARED) != 0) {
+ pmap_page_protect(PMAP_PGARG(uobjpage),
+ VM_PROT_NONE);
+ }
+
+ /*
+ * dispose of uobjpage. it can't be PG_RELEASED
+ * since we still hold the object lock. drop
+ * handle to uobj as well.
+ */
+
+ if (uobjpage->flags & PG_WANTED)
+ /* still have the obj lock */
+ thread_wakeup(uobjpage);
+ uobjpage->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(uobjpage, NULL);
+ uvm_lock_pageq();
+ uvm_pageactivate(uobjpage); /* put it back */
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ uobj = NULL;
+ UVMHIST_LOG(maphist,
+ " promote uobjpage 0x%x to anon/page 0x%x/0x%x",
+ uobjpage, anon, pg, 0);
+
+ } else {
+ uvmexp.flt_przero++;
+ uvm_pagezero(pg); /* zero page [pg now dirty] */
+ UVMHIST_LOG(maphist," zero fill anon/page 0x%x/0%x",
+ anon, pg, 0, 0);
+ }
+
+ amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
+ anon, 0);
+
+ }
+
+ /*
+ * locked:
+ * maps(read), amap(if !null), uobj(if !null), uobjpage(if uobj)
+ *
+ * note: pg is either the uobjpage or the new page in the new anon
+ */
+
+ /*
+ * all resources are present. we can now map it in and free our
+ * resources.
+ */
+
+ UVMHIST_LOG(maphist,
+ " MAPPING: case2: pm=0x%x, va=0x%x, pg=0x%x, promote=%d",
+ ufi.orig_map->pmap, ufi.orig_rvaddr, pg, promote);
+ pmap_enter(ufi.orig_map->pmap, ufi.orig_rvaddr, VM_PAGE_TO_PHYS(pg),
+ enter_prot, wired);
+
+ uvm_lock_pageq();
+
+ if (fault_type == VM_FAULT_WIRE) {
+ uvm_pagewire(pg);
+ } else {
+
+ /* activate it */
+ uvm_pageactivate(pg);
+
+ }
+
+ uvm_unlock_pageq();
+
+ if (pg->flags & PG_WANTED)
+ thread_wakeup(pg); /* lock still held */
+
+ /*
+ * note that pg can't be PG_RELEASED since we did not drop the object
+ * lock since the last time we checked.
+ */
+
+ pg->flags &= ~(PG_BUSY|PG_FAKE|PG_WANTED);
+ UVM_PAGE_OWN(pg, NULL);
+ uvmfault_unlockall(&ufi, amap, uobj, NULL);
+
+ UVMHIST_LOG(maphist, "<- done (SUCCESS!)",0,0,0,0);
+ return (KERN_SUCCESS);
+}
+
+
+/*
+ * uvm_fault_wire: wire down a range of virtual addresses in a map.
+ *
+ * => map should be locked by caller? If so how can we call
+ * uvm_fault? WRONG.
+ * => XXXCDC: locking here is all screwed up!!! start with
+ * uvm_map_pageable and fix it.
+ */
+
+int
+uvm_fault_wire(map, start, end)
+ vm_map_t map;
+ vaddr_t start, end;
+{
+ vaddr_t va;
+ pmap_t pmap;
+ int rv;
+
+ pmap = vm_map_pmap(map);
+
+ /*
+ * call pmap pageable: this tells the pmap layer to lock down these
+ * page tables.
+ */
+
+ pmap_pageable(pmap, start, end, FALSE);
+
+ /*
+ * now fault it in page at a time. if the fault fails then we have
+ * to undo what we have done. note that in uvm_fault VM_PROT_NONE
+ * is replaced with the max protection if fault_type is VM_FAULT_WIRE.
+ */
+
+ for (va = start ; va < end ; va += PAGE_SIZE) {
+ rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE);
+ if (rv) {
+ if (va != start) {
+ uvm_fault_unwire(map->pmap, start, va);
+ }
+ return (rv);
+ }
+ }
+
+ return (KERN_SUCCESS);
+}
+
+/*
+ * uvm_fault_unwire(): unwire range of virtual space.
+ *
+ * => caller holds reference to pmap (via its map)
+ */
+
+void
+uvm_fault_unwire(pmap, start, end)
+ struct pmap *pmap;
+ vaddr_t start, end;
+{
+ vaddr_t va;
+ paddr_t pa;
+ struct vm_page *pg;
+
+ /*
+ * we assume that the area we are unwiring has actually been wired
+ * in the first place. this means that we should be able to extract
+ * the PAs from the pmap. we also lock out the page daemon so that
+ * we can call uvm_pageunwire.
+ */
+
+ uvm_lock_pageq();
+
+ for (va = start; va < end ; va += PAGE_SIZE) {
+ pa = pmap_extract(pmap, va);
+
+ /* XXX: assumes PA 0 cannot be in map */
+ if (pa == (paddr_t) 0) {
+ panic("uvm_fault_unwire: unwiring non-wired memory");
+ }
+ pmap_change_wiring(pmap, va, FALSE); /* tell the pmap */
+ pg = PHYS_TO_VM_PAGE(pa);
+ if (pg)
+ uvm_pageunwire(pg);
+ }
+
+ uvm_unlock_pageq();
+
+ /*
+ * now we call pmap_pageable to let the pmap know that the page tables
+ * in this space no longer need to be wired.
+ */
+
+ pmap_pageable(pmap, start, end, TRUE);
+
+}
diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h
new file mode 100644
index 00000000000..650543ea669
--- /dev/null
+++ b/sys/uvm/uvm_fault.h
@@ -0,0 +1,88 @@
+/* $NetBSD: uvm_fault.h,v 1.7 1998/10/11 23:07:42 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault.h,v 1.1.2.2 1997/12/08 16:07:12 chuck Exp
+ */
+
+#ifndef _UVM_UVM_FAULT_H_
+#define _UVM_UVM_FAULT_H_
+
+/*
+ * fault types
+ */
+
+#define VM_FAULT_INVALID ((vm_fault_t) 0x0) /* invalid mapping */
+#define VM_FAULT_PROTECT ((vm_fault_t) 0x1) /* protection */
+#define VM_FAULT_WIRE ((vm_fault_t) 0x2) /* wire mapping */
+
+/*
+ * fault data structures
+ */
+
+/*
+ * uvm_faultinfo: to load one of these fill in all orig_* fields and
+ * then call uvmfault_lookup on it.
+ */
+
+
+struct uvm_faultinfo {
+ vm_map_t orig_map; /* IN: original map */
+ vaddr_t orig_rvaddr; /* IN: original rounded VA */
+ vsize_t orig_size; /* IN: original size of interest */
+ vm_map_t map; /* map (could be a submap) */
+ unsigned int mapv; /* map's version number */
+ vm_map_entry_t entry; /* map entry (from 'map') */
+ vsize_t size; /* size of interest */
+};
+
+/*
+ * fault prototypes
+ */
+
+
+int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *,
+ struct vm_anon *));
+static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t));
+static boolean_t uvmfault_relock __P((struct uvm_faultinfo *));
+static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *,
+ struct uvm_object *, struct vm_anon *));
+static void uvmfault_unlockmaps __P((struct uvm_faultinfo *, boolean_t));
+
+int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t));
+void uvm_fault_unwire __P((struct pmap *, vaddr_t, vaddr_t));
+
+#endif /* _UVM_UVM_FAULT_H_ */
diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h
new file mode 100644
index 00000000000..40c5cddcef8
--- /dev/null
+++ b/sys/uvm/uvm_fault_i.h
@@ -0,0 +1,203 @@
+/* $NetBSD: uvm_fault_i.h,v 1.7 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_fault_i.h,v 1.1.6.1 1997/12/08 16:07:12 chuck Exp
+ */
+
+#ifndef _UVM_UVM_FAULT_I_H_
+#define _UVM_UVM_FAULT_I_H_
+
+/*
+ * uvm_fault_i.h: fault inline functions
+ */
+
+/*
+ * uvmfault_unlockmaps: unlock the maps
+ */
+
+static __inline void
+uvmfault_unlockmaps(ufi, write_locked)
+ struct uvm_faultinfo *ufi;
+ boolean_t write_locked;
+{
+
+ if (write_locked) {
+ vm_map_unlock(ufi->map);
+ } else {
+ vm_map_unlock_read(ufi->map);
+ }
+}
+
+/*
+ * uvmfault_unlockall: unlock everything passed in.
+ *
+ * => maps must be read-locked (not write-locked).
+ */
+
+static __inline void
+uvmfault_unlockall(ufi, amap, uobj, anon)
+ struct uvm_faultinfo *ufi;
+ struct vm_amap *amap;
+ struct uvm_object *uobj;
+ struct vm_anon *anon;
+{
+
+ if (anon)
+ simple_unlock(&anon->an_lock);
+ if (uobj)
+ simple_unlock(&uobj->vmobjlock);
+ if (amap)
+ amap_unlock(amap);
+ uvmfault_unlockmaps(ufi, FALSE);
+}
+
+/*
+ * uvmfault_lookup: lookup a virtual address in a map
+ *
+ * => caller must provide a uvm_faultinfo structure with the IN
+ * params properly filled in
+ * => we will lookup the map entry (handling submaps) as we go
+ * => if the lookup is a success we will return with the maps locked
+ * => if "write_lock" is TRUE, we write_lock the map, otherwise we only
+ * get a read lock.
+ * => note that submaps can only appear in the kernel and they are
+ * required to use the same virtual addresses as the map they
+ * are referenced by (thus address translation between the main
+ * map and the submap is unnecessary).
+ */
+
+static __inline boolean_t
+uvmfault_lookup(ufi, write_lock)
+ struct uvm_faultinfo *ufi;
+ boolean_t write_lock;
+{
+ vm_map_t tmpmap;
+
+ /*
+ * init ufi values for lookup.
+ */
+
+ ufi->map = ufi->orig_map;
+ ufi->size = ufi->orig_size;
+
+ /*
+ * keep going down levels until we are done. note that there can
+ * only be two levels so we won't loop very long.
+ */
+
+ while (1) {
+
+ /*
+ * lock map
+ */
+ if (write_lock) {
+ vm_map_lock(ufi->map);
+ } else {
+ vm_map_lock_read(ufi->map);
+ }
+
+ /*
+ * lookup
+ */
+ if (!uvm_map_lookup_entry(ufi->map, ufi->orig_rvaddr,
+ &ufi->entry)) {
+ uvmfault_unlockmaps(ufi, write_lock);
+ return(FALSE);
+ }
+
+ /*
+ * reduce size if necessary
+ */
+ if (ufi->entry->end - ufi->orig_rvaddr < ufi->size)
+ ufi->size = ufi->entry->end - ufi->orig_rvaddr;
+
+ /*
+ * submap? replace map with the submap and lookup again.
+ * note: VAs in submaps must match VAs in main map.
+ */
+ if (UVM_ET_ISSUBMAP(ufi->entry)) {
+ tmpmap = ufi->entry->object.sub_map;
+ if (write_lock) {
+ vm_map_unlock(ufi->map);
+ } else {
+ vm_map_unlock_read(ufi->map);
+ }
+ ufi->map = tmpmap;
+ continue;
+ }
+
+ /*
+ * got it!
+ */
+
+ ufi->mapv = ufi->map->timestamp;
+ return(TRUE);
+
+ } /* while loop */
+
+ /*NOTREACHED*/
+}
+
+/*
+ * uvmfault_relock: attempt to relock the same version of the map
+ *
+ * => fault data structures should be unlocked before calling.
+ * => if a success (TRUE) maps will be locked after call.
+ */
+
+static __inline boolean_t
+uvmfault_relock(ufi)
+ struct uvm_faultinfo *ufi;
+{
+
+ uvmexp.fltrelck++;
+ /*
+ * relock map. fail if version mismatch (in which case nothing
+ * gets locked).
+ */
+
+ vm_map_lock_read(ufi->map);
+ if (ufi->mapv != ufi->map->timestamp) {
+ vm_map_unlock_read(ufi->map);
+ return(FALSE);
+ }
+
+ uvmexp.fltrelckok++;
+ return(TRUE); /* got it! */
+}
+
+#endif /* _UVM_UVM_FAULT_I_H_ */
diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c
new file mode 100644
index 00000000000..b46fd012c16
--- /dev/null
+++ b/sys/uvm/uvm_glue.c
@@ -0,0 +1,605 @@
+/* $NetBSD: uvm_glue.c,v 1.15 1998/10/19 22:21:19 tron Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_glue.c 8.6 (Berkeley) 1/5/94
+ * from: Id: uvm_glue.c,v 1.1.2.8 1998/02/07 01:16:54 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_glue.c: glue functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/buf.h>
+#include <sys/user.h>
+#ifdef SYSVSHM
+#include <sys/shm.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+#include <machine/cpu.h>
+
+/*
+ * local prototypes
+ */
+
+static void uvm_swapout __P((struct proc *));
+
+/*
+ * XXXCDC: do these really belong here?
+ */
+
+unsigned maxdmap = MAXDSIZ; /* kern_resource.c: RLIMIT_DATA max */
+unsigned maxsmap = MAXSSIZ; /* kern_resource.c: RLIMIT_STACK max */
+
+int readbuffers = 0; /* allow KGDB to read kern buffer pool */
+ /* XXX: see uvm_kernacc */
+
+
+/*
+ * uvm_kernacc: can the kernel access a region of memory
+ *
+ * - called from malloc [DIAGNOSTIC], and /dev/kmem driver (mem.c)
+ */
+
+boolean_t
+uvm_kernacc(addr, len, rw)
+ caddr_t addr;
+ size_t len;
+ int rw;
+{
+ boolean_t rv;
+ vaddr_t saddr, eaddr;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+ saddr = trunc_page(addr);
+ eaddr = round_page(addr+len);
+ vm_map_lock_read(kernel_map);
+ rv = uvm_map_checkprot(kernel_map, saddr, eaddr, prot);
+ vm_map_unlock_read(kernel_map);
+
+ /*
+ * XXX there are still some things (e.g. the buffer cache) that
+ * are managed behind the VM system's back so even though an
+ * address is accessible in the mind of the VM system, there may
+ * not be physical pages where the VM thinks there is. This can
+ * lead to bogus allocation of pages in the kernel address space
+ * or worse, inconsistencies at the pmap level. We only worry
+ * about the buffer cache for now.
+ */
+ if (!readbuffers && rv && (eaddr > (vaddr_t)buffers &&
+ saddr < (vaddr_t)buffers + MAXBSIZE * nbuf))
+ rv = FALSE;
+ return(rv);
+}
+
+/*
+ * uvm_useracc: can the user access it?
+ *
+ * - called from physio() and sys___sysctl().
+ */
+
+boolean_t
+uvm_useracc(addr, len, rw)
+ caddr_t addr;
+ size_t len;
+ int rw;
+{
+ boolean_t rv;
+ vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
+
+#if defined(i386) || defined(pc532)
+ /*
+ * XXX - specially disallow access to user page tables - they are
+ * in the map. This is here until i386 & pc532 pmaps are fixed...
+ */
+ if ((vaddr_t) addr >= VM_MAXUSER_ADDRESS
+ || (vaddr_t) addr + len > VM_MAXUSER_ADDRESS
+ || (vaddr_t) addr + len <= (vaddr_t) addr)
+ return (FALSE);
+#endif
+
+ rv = uvm_map_checkprot(&curproc->p_vmspace->vm_map,
+ trunc_page(addr), round_page(addr+len), prot);
+ return(rv);
+}
+
+#ifdef KGDB
+/*
+ * Change protections on kernel pages from addr to addr+len
+ * (presumably so debugger can plant a breakpoint).
+ *
+ * We force the protection change at the pmap level. If we were
+ * to use vm_map_protect a change to allow writing would be lazily-
+ * applied meaning we would still take a protection fault, something
+ * we really don't want to do. It would also fragment the kernel
+ * map unnecessarily. We cannot use pmap_protect since it also won't
+ * enforce a write-enable request. Using pmap_enter is the only way
+ * we can ensure the change takes place properly.
+ */
+void
+uvm_chgkprot(addr, len, rw)
+ register caddr_t addr;
+ size_t len;
+ int rw;
+{
+ vm_prot_t prot;
+ paddr_t pa;
+ vaddr_t sva, eva;
+
+ prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
+ eva = round_page(addr + len);
+ for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
+ /*
+ * Extract physical address for the page.
+ * We use a cheezy hack to differentiate physical
+ * page 0 from an invalid mapping, not that it
+ * really matters...
+ */
+ pa = pmap_extract(pmap_kernel(), sva|1);
+ if (pa == 0)
+ panic("chgkprot: invalid page");
+ pmap_enter(pmap_kernel(), sva, pa&~1, prot, TRUE);
+ }
+}
+#endif
+
+/*
+ * vslock: wire user memory for I/O
+ *
+ * - called from physio and sys___sysctl
+ * - XXXCDC: consider nuking this (or making it a macro?)
+ */
+
+void
+uvm_vslock(p, addr, len)
+ struct proc *p;
+ caddr_t addr;
+ size_t len;
+{
+ uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr),
+ round_page(addr+len));
+}
+
+/*
+ * vslock: wire user memory for I/O
+ *
+ * - called from physio and sys___sysctl
+ * - XXXCDC: consider nuking this (or making it a macro?)
+ */
+
+void
+uvm_vsunlock(p, addr, len)
+ struct proc *p;
+ caddr_t addr;
+ size_t len;
+{
+ uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr),
+ round_page(addr+len));
+}
+
+/*
+ * uvm_fork: fork a virtual address space
+ *
+ * - the address space is copied as per parent map's inherit values
+ * - a new "user" structure is allocated for the child process
+ * [filled in by MD layer...]
+ * - NOTE: the kernel stack may be at a different location in the child
+ * process, and thus addresses of automatic variables may be invalid
+ * after cpu_fork returns in the child process. We do nothing here
+ * after cpu_fork returns.
+ * - XXXCDC: we need a way for this to return a failure value rather
+ * than just hang
+ */
+void
+uvm_fork(p1, p2, shared)
+ struct proc *p1, *p2;
+ boolean_t shared;
+{
+ struct user *up = p2->p_addr;
+ int rv;
+
+ if (shared == TRUE)
+ uvmspace_share(p1, p2); /* share vmspace */
+ else
+ p2->p_vmspace = uvmspace_fork(p1->p_vmspace); /* fork vmspace */
+
+ /*
+ * Wire down the U-area for the process, which contains the PCB
+ * and the kernel stack. Wired state is stored in p->p_flag's
+ * P_INMEM bit rather than in the vm_map_entry's wired count
+ * to prevent kernel_map fragmentation.
+ */
+ rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
+ (vaddr_t)up + USPACE);
+ if (rv != KERN_SUCCESS)
+ panic("uvm_fork: uvm_fault_wire failed: %d", rv);
+
+ /*
+ * p_stats and p_sigacts currently point at fields in the user
+ * struct but not at &u, instead at p_addr. Copy p_sigacts and
+ * parts of p_stats; zero the rest of p_stats (statistics).
+ */
+ p2->p_stats = &up->u_stats;
+ p2->p_sigacts = &up->u_sigacts;
+ up->u_sigacts = *p1->p_sigacts;
+ bzero(&up->u_stats.pstat_startzero,
+ (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
+ (caddr_t)&up->u_stats.pstat_startzero));
+ bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
+ ((caddr_t)&up->u_stats.pstat_endcopy -
+ (caddr_t)&up->u_stats.pstat_startcopy));
+
+/*
+ * cpu_fork will copy and update the kernel stack and pcb, and make
+ * the child ready to run. The child will exit directly to user
+ * mode on its first time slice, and will not return here.
+ */
+ cpu_fork(p1, p2);
+}
+
+/*
+ * uvm_exit: exit a virtual address space
+ *
+ * - the process passed to us is a dead (pre-zombie) process; we
+ * are running on a different context now (the reaper).
+ * - we must run in a separate thread because freeing the vmspace
+ * of the dead process may block.
+ */
+void
+uvm_exit(p)
+ struct proc *p;
+{
+
+ uvmspace_free(p->p_vmspace);
+ uvm_km_free(kernel_map, (vaddr_t)p->p_addr, USPACE);
+}
+
+/*
+ * uvm_init_limit: init per-process VM limits
+ *
+ * - called for process 0 and then inherited by all others.
+ */
+void
+uvm_init_limits(p)
+ struct proc *p;
+{
+
+ /*
+ * Set up the initial limits on process VM. Set the maximum
+ * resident set size to be all of (reasonably) available memory.
+ * This causes any single, large process to start random page
+ * replacement once it fills memory.
+ */
+
+ p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
+ p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
+ p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
+ p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(uvmexp.free);
+}
+
+#ifdef DEBUG
+int enableswap = 1;
+int swapdebug = 0;
+#define SDB_FOLLOW 1
+#define SDB_SWAPIN 2
+#define SDB_SWAPOUT 4
+#endif
+
+/*
+ * uvm_swapin: swap in a process's u-area.
+ */
+
+void
+uvm_swapin(p)
+ struct proc *p;
+{
+ vaddr_t addr;
+ int s;
+
+ addr = (vaddr_t)p->p_addr;
+ /* make P_INMEM true */
+ uvm_fault_wire(kernel_map, addr, addr + USPACE);
+
+ /*
+ * Some architectures need to be notified when the user area has
+ * moved to new physical page(s) (e.g. see mips/mips/vm_machdep.c).
+ */
+ cpu_swapin(p);
+ s = splstatclock();
+ if (p->p_stat == SRUN)
+ setrunqueue(p);
+ p->p_flag |= P_INMEM;
+ splx(s);
+ p->p_swtime = 0;
+ ++uvmexp.swapins;
+}
+
+/*
+ * uvm_scheduler: process zero main loop
+ *
+ * - attempt to swapin every swaped-out, runnable process in order of
+ * priority.
+ * - if not enough memory, wake the pagedaemon and let it clear space.
+ */
+
+void
+uvm_scheduler()
+{
+ register struct proc *p;
+ register int pri;
+ struct proc *pp;
+ int ppri;
+ UVMHIST_FUNC("uvm_scheduler"); UVMHIST_CALLED(maphist);
+
+loop:
+#ifdef DEBUG
+ while (!enableswap)
+ tsleep((caddr_t)&proc0, PVM, "noswap", 0);
+#endif
+ pp = NULL; /* process to choose */
+ ppri = INT_MIN; /* its priority */
+ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+
+ /* is it a runnable swapped out process? */
+ if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
+ pri = p->p_swtime + p->p_slptime -
+ (p->p_nice - NZERO) * 8;
+ if (pri > ppri) { /* higher priority? remember it. */
+ pp = p;
+ ppri = pri;
+ }
+ }
+ }
+
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("scheduler: running, procp %p pri %d\n", pp, ppri);
+#endif
+ /*
+ * Nothing to do, back to sleep
+ */
+ if ((p = pp) == NULL) {
+ tsleep((caddr_t)&proc0, PVM, "scheduler", 0);
+ goto loop;
+ }
+
+ /*
+ * we have found swapped out process which we would like to bring
+ * back in.
+ *
+ * XXX: this part is really bogus cuz we could deadlock on memory
+ * despite our feeble check
+ */
+ if (uvmexp.free > atop(USPACE)) {
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPIN)
+ printf("swapin: pid %d(%s)@%p, pri %d free %d\n",
+ p->p_pid, p->p_comm, p->p_addr, ppri, uvmexp.free);
+#endif
+ uvm_swapin(p);
+ goto loop;
+ }
+ /*
+ * not enough memory, jab the pageout daemon and wait til the coast
+ * is clear
+ */
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("scheduler: no room for pid %d(%s), free %d\n",
+ p->p_pid, p->p_comm, uvmexp.free);
+#endif
+ printf("scheduler: no room for pid %d(%s), free %d\n",
+ p->p_pid, p->p_comm, uvmexp.free);/*XXXCDC: HIGHLY BOGUS */
+ (void) splhigh();
+ uvm_wait("schedpwait");
+ (void) spl0();
+#ifdef DEBUG
+ if (swapdebug & SDB_FOLLOW)
+ printf("scheduler: room again, free %d\n", uvmexp.free);
+#endif
+ goto loop;
+}
+
+/*
+ * swappable: is process "p" swappable?
+ */
+
+#define swappable(p) \
+ (((p)->p_flag & (P_SYSTEM | P_INMEM | P_WEXIT)) == P_INMEM && \
+ (p)->p_holdcnt == 0)
+
+/*
+ * swapout_threads: find threads that can be swapped and unwire their
+ * u-areas.
+ *
+ * - called by the pagedaemon
+ * - try and swap at least one processs
+ * - processes that are sleeping or stopped for maxslp or more seconds
+ * are swapped... otherwise the longest-sleeping or stopped process
+ * is swapped, otherwise the longest resident process...
+ */
+void
+uvm_swapout_threads()
+{
+ register struct proc *p;
+ struct proc *outp, *outp2;
+ int outpri, outpri2;
+ int didswap = 0;
+ extern int maxslp;
+ /* XXXCDC: should move off to uvmexp. or uvm., also in uvm_meter */
+
+#ifdef DEBUG
+ if (!enableswap)
+ return;
+#endif
+
+ /*
+ * outp/outpri : stop/sleep process with largest sleeptime < maxslp
+ * outp2/outpri2: the longest resident process (its swap time)
+ */
+ outp = outp2 = NULL;
+ outpri = outpri2 = 0;
+ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+ if (!swappable(p))
+ continue;
+ switch (p->p_stat) {
+ case SRUN:
+ if (p->p_swtime > outpri2) {
+ outp2 = p;
+ outpri2 = p->p_swtime;
+ }
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ if (p->p_slptime >= maxslp) {
+ uvm_swapout(p); /* zap! */
+ didswap++;
+ } else if (p->p_slptime > outpri) {
+ outp = p;
+ outpri = p->p_slptime;
+ }
+ continue;
+ }
+ }
+
+ /*
+ * If we didn't get rid of any real duds, toss out the next most
+ * likely sleeping/stopped or running candidate. We only do this
+ * if we are real low on memory since we don't gain much by doing
+ * it (USPACE bytes).
+ */
+ if (didswap == 0 && uvmexp.free <= atop(round_page(USPACE))) {
+ if ((p = outp) == NULL)
+ p = outp2;
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPOUT)
+ printf("swapout_threads: no duds, try procp %p\n", p);
+#endif
+ if (p)
+ uvm_swapout(p);
+ }
+}
+
+/*
+ * uvm_swapout: swap out process "p"
+ *
+ * - currently "swapout" means "unwire U-area" and "pmap_collect()"
+ * the pmap.
+ * - XXXCDC: should deactivate all process' private anonymous memory
+ */
+
+static void
+uvm_swapout(p)
+ register struct proc *p;
+{
+ vaddr_t addr;
+ int s;
+
+#ifdef DEBUG
+ if (swapdebug & SDB_SWAPOUT)
+ printf("swapout: pid %d(%s)@%p, stat %x pri %d free %d\n",
+ p->p_pid, p->p_comm, p->p_addr, p->p_stat,
+ p->p_slptime, uvmexp.free);
+#endif
+
+ /*
+ * Do any machine-specific actions necessary before swapout.
+ * This can include saving floating point state, etc.
+ */
+ cpu_swapout(p);
+
+ /*
+ * Unwire the to-be-swapped process's user struct and kernel stack.
+ */
+ addr = (vaddr_t)p->p_addr;
+ uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */
+ pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
+
+ /*
+ * Mark it as (potentially) swapped out.
+ */
+ s = splstatclock();
+ p->p_flag &= ~P_INMEM;
+ if (p->p_stat == SRUN)
+ remrunqueue(p);
+ splx(s);
+ p->p_swtime = 0;
+ ++uvmexp.swapouts;
+}
+
diff --git a/sys/uvm/uvm_glue.h b/sys/uvm/uvm_glue.h
new file mode 100644
index 00000000000..8a137800fcd
--- /dev/null
+++ b/sys/uvm/uvm_glue.h
@@ -0,0 +1,50 @@
+/* $NetBSD: uvm_glue.h,v 1.4 1998/02/10 02:34:37 perry Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_glue.h,v 1.1.2.1 1997/08/14 19:10:48 chuck Exp
+ */
+
+#ifndef _UVM_UVM_GLUE_H_
+#define _UVM_UVM_GLUE_H_
+
+/*
+ * uvm_glue.h
+ */
+
+void uvm_swapout_threads __P((void));
+
+#endif /* _UVM_UVM_GLUE_H_ */
diff --git a/sys/uvm/uvm_init.c b/sys/uvm/uvm_init.c
new file mode 100644
index 00000000000..95406c95b0c
--- /dev/null
+++ b/sys/uvm/uvm_init.c
@@ -0,0 +1,167 @@
+/* $NetBSD: uvm_init.c,v 1.10 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_init.c,v 1.1.2.3 1998/02/06 05:15:27 chs Exp
+ */
+
+/*
+ * uvm_init.c: init the vm system.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * struct uvm: we store all global vars in this structure to make them
+ * easier to spot...
+ */
+
+struct uvm uvm; /* decl */
+struct uvmexp uvmexp; /* decl */
+
+/*
+ * local prototypes
+ */
+
+/*
+ * uvm_init: init the VM system. called from kern/init_main.c.
+ */
+
+void
+uvm_init()
+{
+ vaddr_t kvm_start, kvm_end;
+
+ /*
+ * step 0: ensure that the hardware set the page size
+ */
+
+ if (uvmexp.pagesize == 0) {
+ panic("uvm_init: page size not set");
+ }
+
+ /*
+ * step 1: zero the uvm structure
+ */
+
+ bzero(&uvm, sizeof(uvm));
+ averunnable.fscale = FSCALE;
+
+ /*
+ * step 2: init the page sub-system. this includes allocating the
+ * vm_page structures, and setting up all the page queues (and
+ * locks). available memory will be put in the "free" queue.
+ * kvm_start and kvm_end will be set to the area of kernel virtual
+ * memory which is available for general use.
+ */
+
+ uvm_page_init(&kvm_start, &kvm_end);
+
+ /*
+ * step 3: init the map sub-system. allocates the static pool of
+ * vm_map_entry structures that are used for "special" kernel maps
+ * (e.g. kernel_map, kmem_map, etc...).
+ */
+
+ uvm_map_init();
+
+ /*
+ * step 4: setup the kernel's virtual memory data structures. this
+ * includes setting up the kernel_map/kernel_object and the kmem_map/
+ * kmem_object.
+ */
+
+ uvm_km_init(kvm_start, kvm_end);
+
+ /*
+ * step 5: init the pmap module. the pmap module is free to allocate
+ * memory for its private use (e.g. pvlists).
+ */
+
+ pmap_init();
+
+ /*
+ * step 6: init the kernel memory allocator. after this call the
+ * kernel memory allocator (malloc) can be used.
+ */
+
+ kmeminit();
+
+ /*
+ * step 7: init all pagers and the pager_map.
+ */
+
+ uvm_pager_init();
+
+ /*
+ * step 8: init anonymous memory systems (both amap and anons)
+ */
+
+ amap_init(); /* init amap module */
+ uvm_anon_init(); /* allocate initial anons */
+
+ /*
+ * the VM system is now up! now that malloc is up we can resize the
+ * <obj,off> => <page> hash table for general use and enable paging
+ * of kernel objects.
+ */
+
+ uvm_page_rehash();
+ uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS,
+ UAO_FLAG_KERNSWAP);
+
+ /*
+ * done!
+ */
+
+ return;
+}
diff --git a/sys/uvm/uvm_io.c b/sys/uvm/uvm_io.c
new file mode 100644
index 00000000000..603e04b26d9
--- /dev/null
+++ b/sys/uvm/uvm_io.c
@@ -0,0 +1,163 @@
+/* $NetBSD: uvm_io.c,v 1.7 1998/10/11 23:18:20 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_io.c,v 1.1.2.2 1997/12/30 12:02:00 mrg Exp
+ */
+
+/*
+ * uvm_io.c: uvm i/o ops
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/uio.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * functions
+ */
+
+/*
+ * uvm_io: perform I/O on a map
+ *
+ * => caller must have a reference to "map" so that it doesn't go away
+ * while we are working.
+ */
+
+int
+uvm_io(map, uio)
+ vm_map_t map;
+ struct uio *uio;
+{
+ vaddr_t baseva, endva, pageoffset, kva;
+ vsize_t chunksz, togo, sz;
+ vm_map_entry_t dead_entries;
+ int error;
+
+ /*
+ * step 0: sanity checks and set up for copy loop. start with a
+ * large chunk size. if we have trouble finding vm space we will
+ * reduce it.
+ */
+
+ if (uio->uio_resid == 0)
+ return(0);
+ togo = uio->uio_resid;
+
+ baseva = (vaddr_t) uio->uio_offset;
+ endva = baseva + (togo - 1);
+
+ if (endva < baseva) /* wrap around? */
+ return(EIO);
+
+ if (baseva >= VM_MAXUSER_ADDRESS)
+ return(0);
+ if (endva >= VM_MAXUSER_ADDRESS)
+ /* EOF truncate */
+ togo = togo - (endva - VM_MAXUSER_ADDRESS + 1);
+ pageoffset = baseva & PAGE_MASK;
+ baseva = trunc_page(baseva);
+ chunksz = min(round_page(togo + pageoffset), MAXBSIZE);
+ error = 0;
+
+ /*
+ * step 1: main loop... while we've got data to move
+ */
+
+ for (/*null*/; togo > 0 ; pageoffset = 0) {
+
+ /*
+ * step 2: extract mappings from the map into kernel_map
+ */
+
+ error = uvm_map_extract(map, baseva, chunksz, kernel_map, &kva,
+ UVM_EXTRACT_QREF | UVM_EXTRACT_CONTIG |
+ UVM_EXTRACT_FIXPROT);
+ if (error) {
+
+ /* retry with a smaller chunk... */
+ if (error == ENOMEM && chunksz > PAGE_SIZE) {
+ chunksz = trunc_page(chunksz / 2);
+ if (chunksz < PAGE_SIZE)
+ chunksz = PAGE_SIZE;
+ continue;
+ }
+
+ break;
+ }
+
+ /*
+ * step 3: move a chunk of data
+ */
+
+ sz = chunksz - pageoffset;
+ if (sz > togo)
+ sz = togo;
+ error = uiomove((caddr_t) (kva + pageoffset), sz, uio);
+ if (error)
+ break;
+ togo -= sz;
+ baseva += chunksz;
+
+
+ /*
+ * step 4: unmap the area of kernel memory
+ */
+
+ vm_map_lock(kernel_map);
+ (void)uvm_unmap_remove(kernel_map, kva, kva+chunksz,
+ &dead_entries);
+ vm_map_unlock(kernel_map);
+
+ if (dead_entries != NULL)
+ uvm_unmap_detach(dead_entries, AMAP_REFALL);
+ }
+
+ /*
+ * done
+ */
+
+ return (error);
+}
diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
new file mode 100644
index 00000000000..49e9e5191bc
--- /dev/null
+++ b/sys/uvm/uvm_km.c
@@ -0,0 +1,1081 @@
+/* $NetBSD: uvm_km.c,v 1.18 1998/10/18 23:49:59 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_kern.c 8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_km.c,v 1.1.2.14 1998/02/06 05:19:27 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_km.c: handle kernel memory allocation and management
+ */
+
+/*
+ * overview of kernel memory management:
+ *
+ * the kernel virtual address space is mapped by "kernel_map." kernel_map
+ * starts at VM_MIN_KERNEL_ADDRESS and goes to VM_MAX_KERNEL_ADDRESS.
+ * note that VM_MIN_KERNEL_ADDRESS is equal to vm_map_min(kernel_map).
+ *
+ * the kernel_map has several "submaps." submaps can only appear in
+ * the kernel_map (user processes can't use them). submaps "take over"
+ * the management of a sub-range of the kernel's address space. submaps
+ * are typically allocated at boot time and are never released. kernel
+ * virtual address space that is mapped by a submap is locked by the
+ * submap's lock -- not the kernel_map's lock.
+ *
+ * thus, the useful feature of submaps is that they allow us to break
+ * up the locking and protection of the kernel address space into smaller
+ * chunks.
+ *
+ * the vm system has several standard kernel submaps, including:
+ * kmem_map => contains only wired kernel memory for the kernel
+ * malloc. *** access to kmem_map must be protected
+ * by splimp() because we are allowed to call malloc()
+ * at interrupt time ***
+ * mb_map => memory for large mbufs, *** protected by splimp ***
+ * pager_map => used to map "buf" structures into kernel space
+ * exec_map => used during exec to handle exec args
+ * etc...
+ *
+ * the kernel allocates its private memory out of special uvm_objects whose
+ * reference count is set to UVM_OBJ_KERN (thus indicating that the objects
+ * are "special" and never die). all kernel objects should be thought of
+ * as large, fixed-sized, sparsely populated uvm_objects. each kernel
+ * object is equal to the size of kernel virtual address space (i.e. the
+ * value "VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS").
+ *
+ * most kernel private memory lives in kernel_object. the only exception
+ * to this is for memory that belongs to submaps that must be protected
+ * by splimp(). each of these submaps has their own private kernel
+ * object (e.g. kmem_object, mb_object).
+ *
+ * note that just because a kernel object spans the entire kernel virutal
+ * address space doesn't mean that it has to be mapped into the entire space.
+ * large chunks of a kernel object's space go unused either because
+ * that area of kernel VM is unmapped, or there is some other type of
+ * object mapped into that range (e.g. a vnode). for submap's kernel
+ * objects, the only part of the object that can ever be populated is the
+ * offsets that are managed by the submap.
+ *
+ * note that the "offset" in a kernel object is always the kernel virtual
+ * address minus the VM_MIN_KERNEL_ADDRESS (aka vm_map_min(kernel_map)).
+ * example:
+ * suppose VM_MIN_KERNEL_ADDRESS is 0xf8000000 and the kernel does a
+ * uvm_km_alloc(kernel_map, PAGE_SIZE) [allocate 1 wired down page in the
+ * kernel map]. if uvm_km_alloc returns virtual address 0xf8235000,
+ * then that means that the page at offset 0x235000 in kernel_object is
+ * mapped at 0xf8235000.
+ *
+ * note that the offsets in kmem_object and mb_object also follow this
+ * rule. this means that the offsets for kmem_object must fall in the
+ * range of [vm_map_min(kmem_object) - vm_map_min(kernel_map)] to
+ * [vm_map_max(kmem_object) - vm_map_min(kernel_map)], so the offsets
+ * in those objects will typically not start at zero.
+ *
+ * kernel object have one other special property: when the kernel virtual
+ * memory mapping them is unmapped, the backing memory in the object is
+ * freed right away. this is done with the uvm_km_pgremove() function.
+ * this has to be done because there is no backing store for kernel pages
+ * and no need to save them after they are no longer referenced.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * global data structures
+ */
+
+vm_map_t kernel_map = NULL;
+
+/*
+ * local functions
+ */
+
+static int uvm_km_get __P((struct uvm_object *, vaddr_t,
+ vm_page_t *, int *, int, vm_prot_t, int, int));
+/*
+ * local data structues
+ */
+
+static struct vm_map kernel_map_store;
+static struct uvm_object kmem_object_store;
+static struct uvm_object mb_object_store;
+
+static struct uvm_pagerops km_pager = {
+ NULL, /* init */
+ NULL, /* attach */
+ NULL, /* reference */
+ NULL, /* detach */
+ NULL, /* fault */
+ NULL, /* flush */
+ uvm_km_get, /* get */
+ /* ... rest are NULL */
+};
+
+/*
+ * uvm_km_get: pager get function for kernel objects
+ *
+ * => currently we do not support pageout to the swap area, so this
+ * pager is very simple. eventually we may want an anonymous
+ * object pager which will do paging.
+ * => XXXCDC: this pager should be phased out in favor of the aobj pager
+ */
+
+
+static int
+uvm_km_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ struct vm_page **pps;
+ int *npagesp;
+ int centeridx, advice, flags;
+ vm_prot_t access_type;
+{
+ vaddr_t current_offset;
+ vm_page_t ptmp;
+ int lcv, gotpages, maxpages;
+ boolean_t done;
+ UVMHIST_FUNC("uvm_km_get"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
+
+ /*
+ * get number of pages
+ */
+
+ maxpages = *npagesp;
+
+ /*
+ * step 1: handled the case where fault data structures are locked.
+ */
+
+ if (flags & PGO_LOCKED) {
+
+ /*
+ * step 1a: get pages that are already resident. only do
+ * this if the data structures are locked (i.e. the first time
+ * through).
+ */
+
+ done = TRUE; /* be optimistic */
+ gotpages = 0; /* # of pages we got so far */
+
+ for (lcv = 0, current_offset = offset ;
+ lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) {
+
+ /* do we care about this page? if not, skip it */
+ if (pps[lcv] == PGO_DONTCARE)
+ continue;
+
+ /* lookup page */
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /* null? attempt to allocate the page */
+ if (ptmp == NULL) {
+ ptmp = uvm_pagealloc(uobj, current_offset,
+ NULL);
+ if (ptmp) {
+ /* new page */
+ ptmp->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(ptmp, NULL);
+ uvm_pagezero(ptmp);
+ }
+ }
+
+ /*
+ * to be useful must get a non-busy, non-released page
+ */
+ if (ptmp == NULL ||
+ (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ if (lcv == centeridx ||
+ (flags & PGO_ALLPAGES) != 0)
+ /* need to do a wait or I/O! */
+ done = FALSE;
+ continue;
+ }
+
+ /*
+ * useful page: busy/lock it and plug it in our
+ * result array
+ */
+
+ /* caller must un-busy this page */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uvm_km_get1");
+ pps[lcv] = ptmp;
+ gotpages++;
+
+ } /* "for" lcv loop */
+
+ /*
+ * step 1b: now we've either done everything needed or we
+ * to unlock and do some waiting or I/O.
+ */
+
+ UVMHIST_LOG(maphist, "<- done (done=%d)", done, 0,0,0);
+
+ *npagesp = gotpages;
+ if (done)
+ return(VM_PAGER_OK); /* bingo! */
+ else
+ return(VM_PAGER_UNLOCK); /* EEK! Need to
+ * unlock and I/O */
+ }
+
+ /*
+ * step 2: get non-resident or busy pages.
+ * object is locked. data structures are unlocked.
+ */
+
+ for (lcv = 0, current_offset = offset ;
+ lcv < maxpages ; lcv++, current_offset += PAGE_SIZE) {
+
+ /* skip over pages we've already gotten or don't want */
+ /* skip over pages we don't _have_ to get */
+ if (pps[lcv] != NULL ||
+ (lcv != centeridx && (flags & PGO_ALLPAGES) == 0))
+ continue;
+
+ /*
+ * we have yet to locate the current page (pps[lcv]). we
+ * first look for a page that is already at the current offset.
+ * if we find a page, we check to see if it is busy or
+ * released. if that is the case, then we sleep on the page
+ * until it is no longer busy or released and repeat the
+ * lookup. if the page we found is neither busy nor
+ * released, then we busy it (so we own it) and plug it into
+ * pps[lcv]. this 'break's the following while loop and
+ * indicates we are ready to move on to the next page in the
+ * "lcv" loop above.
+ *
+ * if we exit the while loop with pps[lcv] still set to NULL,
+ * then it means that we allocated a new busy/fake/clean page
+ * ptmp in the object and we need to do I/O to fill in the
+ * data.
+ */
+
+ while (pps[lcv] == NULL) { /* top of "pps" while loop */
+
+ /* look for a current page */
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /* nope? allocate one now (if we can) */
+ if (ptmp == NULL) {
+
+ ptmp = uvm_pagealloc(uobj, current_offset,
+ NULL); /* alloc */
+
+ /* out of RAM? */
+ if (ptmp == NULL) {
+ simple_unlock(&uobj->vmobjlock);
+ uvm_wait("kmgetwait1");
+ simple_lock(&uobj->vmobjlock);
+ /* goto top of pps while loop */
+ continue;
+ }
+
+ /*
+ * got new page ready for I/O. break pps
+ * while loop. pps[lcv] is still NULL.
+ */
+ break;
+ }
+
+ /* page is there, see if we need to wait on it */
+ if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ ptmp->flags |= PG_WANTED;
+ UVM_UNLOCK_AND_WAIT(ptmp,&uobj->vmobjlock, 0,
+ "uvn_get",0);
+ simple_lock(&uobj->vmobjlock);
+ continue; /* goto top of pps while loop */
+ }
+
+ /*
+ * if we get here then the page has become resident
+ * and unbusy between steps 1 and 2. we busy it now
+ * (so we own it) and set pps[lcv] (so that we exit
+ * the while loop). caller must un-busy.
+ */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uvm_km_get2");
+ pps[lcv] = ptmp;
+ }
+
+ /*
+ * if we own the a valid page at the correct offset, pps[lcv]
+ * will point to it. nothing more to do except go to the
+ * next page.
+ */
+
+ if (pps[lcv])
+ continue; /* next lcv */
+
+ /*
+ * we have a "fake/busy/clean" page that we just allocated.
+ * do the needed "i/o" (in this case that means zero it).
+ */
+
+ uvm_pagezero(ptmp);
+ ptmp->flags &= ~(PG_FAKE);
+ pps[lcv] = ptmp;
+
+ } /* lcv loop */
+
+ /*
+ * finally, unlock object and return.
+ */
+
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(maphist, "<- done (OK)",0,0,0,0);
+ return(VM_PAGER_OK);
+}
+
+/*
+ * uvm_km_init: init kernel maps and objects to reflect reality (i.e.
+ * KVM already allocated for text, data, bss, and static data structures).
+ *
+ * => KVM is defined by VM_MIN_KERNEL_ADDRESS/VM_MAX_KERNEL_ADDRESS.
+ * we assume that [min -> start] has already been allocated and that
+ * "end" is the end.
+ */
+
+void
+uvm_km_init(start, end)
+ vaddr_t start, end;
+{
+ vaddr_t base = VM_MIN_KERNEL_ADDRESS;
+
+ /*
+ * first, init kernel memory objects.
+ */
+
+ /* kernel_object: for pageable anonymous kernel memory */
+ uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
+ VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
+
+ /* kmem_object: for malloc'd memory (wired, protected by splimp) */
+ simple_lock_init(&kmem_object_store.vmobjlock);
+ kmem_object_store.pgops = &km_pager;
+ TAILQ_INIT(&kmem_object_store.memq);
+ kmem_object_store.uo_npages = 0;
+ /* we are special. we never die */
+ kmem_object_store.uo_refs = UVM_OBJ_KERN;
+ uvmexp.kmem_object = &kmem_object_store;
+
+ /* mb_object: for mbuf memory (always wired, protected by splimp) */
+ simple_lock_init(&mb_object_store.vmobjlock);
+ mb_object_store.pgops = &km_pager;
+ TAILQ_INIT(&mb_object_store.memq);
+ mb_object_store.uo_npages = 0;
+ /* we are special. we never die */
+ mb_object_store.uo_refs = UVM_OBJ_KERN;
+ uvmexp.mb_object = &mb_object_store;
+
+ /*
+ * init the map and reserve allready allocated kernel space
+ * before installing.
+ */
+
+ uvm_map_setup(&kernel_map_store, base, end, FALSE);
+ kernel_map_store.pmap = pmap_kernel();
+ if (uvm_map(&kernel_map_store, &base, start - base, NULL,
+ UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
+ UVM_INH_NONE, UVM_ADV_RANDOM,UVM_FLAG_FIXED)) != KERN_SUCCESS)
+ panic("uvm_km_init: could not reserve space for kernel");
+
+ /*
+ * install!
+ */
+
+ kernel_map = &kernel_map_store;
+}
+
+/*
+ * uvm_km_suballoc: allocate a submap in the kernel map. once a submap
+ * is allocated all references to that area of VM must go through it. this
+ * allows the locking of VAs in kernel_map to be broken up into regions.
+ *
+ * => if `fixed' is true, *min specifies where the region described
+ * by the submap must start
+ * => if submap is non NULL we use that as the submap, otherwise we
+ * alloc a new map
+ */
+struct vm_map *
+uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
+ struct vm_map *map;
+ vaddr_t *min, *max; /* OUT, OUT */
+ vsize_t size;
+ boolean_t pageable;
+ boolean_t fixed;
+ struct vm_map *submap;
+{
+ int mapflags = UVM_FLAG_NOMERGE | (fixed ? UVM_FLAG_FIXED : 0);
+
+ size = round_page(size); /* round up to pagesize */
+
+ /*
+ * first allocate a blank spot in the parent map
+ */
+
+ if (uvm_map(map, min, size, NULL, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+ UVM_ADV_RANDOM, mapflags)) != KERN_SUCCESS) {
+ panic("uvm_km_suballoc: unable to allocate space in parent map");
+ }
+
+ /*
+ * set VM bounds (min is filled in by uvm_map)
+ */
+
+ *max = *min + size;
+
+ /*
+ * add references to pmap and create or init the submap
+ */
+
+ pmap_reference(vm_map_pmap(map));
+ if (submap == NULL) {
+ submap = uvm_map_create(vm_map_pmap(map), *min, *max, pageable);
+ if (submap == NULL)
+ panic("uvm_km_suballoc: unable to create submap");
+ } else {
+ uvm_map_setup(submap, *min, *max, pageable);
+ submap->pmap = vm_map_pmap(map);
+ }
+
+ /*
+ * now let uvm_map_submap plug in it...
+ */
+
+ if (uvm_map_submap(map, *min, *max, submap) != KERN_SUCCESS)
+ panic("uvm_km_suballoc: submap allocation failed");
+
+ return(submap);
+}
+
+/*
+ * uvm_km_pgremove: remove pages from a kernel uvm_object.
+ *
+ * => when you unmap a part of anonymous kernel memory you want to toss
+ * the pages right away. (this gets called from uvm_unmap_...).
+ */
+
+#define UKM_HASH_PENALTY 4 /* a guess */
+
+void
+uvm_km_pgremove(uobj, start, end)
+ struct uvm_object *uobj;
+ vaddr_t start, end;
+{
+ boolean_t by_list, is_aobj;
+ struct vm_page *pp, *ppnext;
+ vaddr_t curoff;
+ UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist);
+
+ simple_lock(&uobj->vmobjlock); /* lock object */
+
+ /* is uobj an aobj? */
+ is_aobj = uobj->pgops == &aobj_pager;
+
+ /* choose cheapest traversal */
+ by_list = (uobj->uo_npages <=
+ ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
+
+ if (by_list)
+ goto loop_by_list;
+
+ /* by hash */
+
+ for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
+ pp = uvm_pagelookup(uobj, curoff);
+ if (pp == NULL)
+ continue;
+
+ UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
+ pp->flags & PG_BUSY, 0, 0);
+ /* now do the actual work */
+ if (pp->flags & PG_BUSY)
+ /* owner must check for this when done */
+ pp->flags |= PG_RELEASED;
+ else {
+ pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+
+ /*
+ * if this kernel object is an aobj, free the swap slot.
+ */
+ if (is_aobj) {
+ int slot = uao_set_swslot(uobj,
+ curoff >> PAGE_SHIFT,
+ 0);
+
+ if (slot)
+ uvm_swap_free(slot, 1);
+ }
+
+ uvm_lock_pageq();
+ uvm_pagefree(pp);
+ uvm_unlock_pageq();
+ }
+ /* done */
+
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return;
+
+loop_by_list:
+
+ for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
+
+ ppnext = pp->listq.tqe_next;
+ if (pp->offset < start || pp->offset >= end) {
+ continue;
+ }
+
+ UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp,
+ pp->flags & PG_BUSY, 0, 0);
+ /* now do the actual work */
+ if (pp->flags & PG_BUSY)
+ /* owner must check for this when done */
+ pp->flags |= PG_RELEASED;
+ else {
+ pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+
+ /*
+ * if this kernel object is an aobj, free the swap slot.
+ */
+ if (is_aobj) {
+ int slot = uao_set_swslot(uobj,
+ pp->offset >> PAGE_SHIFT, 0);
+
+ if (slot)
+ uvm_swap_free(slot, 1);
+ }
+
+ uvm_lock_pageq();
+ uvm_pagefree(pp);
+ uvm_unlock_pageq();
+ }
+ /* done */
+
+ }
+ simple_unlock(&uobj->vmobjlock);
+ return;
+}
+
+
+/*
+ * uvm_km_kmemalloc: lower level kernel memory allocator for malloc()
+ *
+ * => we map wired memory into the specified map using the obj passed in
+ * => NOTE: we can return NULL even if we can wait if there is not enough
+ * free VM space in the map... caller should be prepared to handle
+ * this case.
+ * => we return KVA of memory allocated
+ * => flags: NOWAIT, VALLOC - just allocate VA, TRYLOCK - fail if we can't
+ * lock the map
+ */
+
+vaddr_t
+uvm_km_kmemalloc(map, obj, size, flags)
+ vm_map_t map;
+ struct uvm_object *obj;
+ vsize_t size;
+ int flags;
+{
+ vaddr_t kva, loopva;
+ vaddr_t offset;
+ struct vm_page *pg;
+ UVMHIST_FUNC("uvm_km_kmemalloc"); UVMHIST_CALLED(maphist);
+
+
+ UVMHIST_LOG(maphist," (map=0x%x, obj=0x%x, size=0x%x, flags=%d)",
+ map, obj, size, flags);
+#ifdef DIAGNOSTIC
+ /* sanity check */
+ if (vm_map_pmap(map) != pmap_kernel())
+ panic("uvm_km_kmemalloc: invalid map");
+#endif
+
+ /*
+ * setup for call
+ */
+
+ size = round_page(size);
+ kva = vm_map_min(map); /* hint */
+
+ /*
+ * allocate some virtual space
+ */
+
+ if (uvm_map(map, &kva, size, obj, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+ UVM_ADV_RANDOM, (flags & UVM_KMF_TRYLOCK)))
+ != KERN_SUCCESS) {
+ UVMHIST_LOG(maphist, "<- done (no VM)",0,0,0,0);
+ return(0);
+ }
+
+ /*
+ * if all we wanted was VA, return now
+ */
+
+ if (flags & UVM_KMF_VALLOC) {
+ UVMHIST_LOG(maphist,"<- done valloc (kva=0x%x)", kva,0,0,0);
+ return(kva);
+ }
+ /*
+ * recover object offset from virtual address
+ */
+
+ offset = kva - vm_map_min(kernel_map);
+ UVMHIST_LOG(maphist, " kva=0x%x, offset=0x%x", kva, offset,0,0);
+
+ /*
+ * now allocate and map in the memory... note that we are the only ones
+ * whom should ever get a handle on this area of VM.
+ */
+
+ loopva = kva;
+ while (size) {
+ simple_lock(&obj->vmobjlock);
+ pg = uvm_pagealloc(obj, offset, NULL);
+ if (pg) {
+ pg->flags &= ~PG_BUSY; /* new page */
+ UVM_PAGE_OWN(pg, NULL);
+ }
+ simple_unlock(&obj->vmobjlock);
+
+ /*
+ * out of memory?
+ */
+
+ if (pg == NULL) {
+ if (flags & UVM_KMF_NOWAIT) {
+ /* free everything! */
+ uvm_unmap(map, kva, kva + size);
+ return(0);
+ } else {
+ uvm_wait("km_getwait2"); /* sleep here */
+ continue;
+ }
+ }
+
+ /*
+ * map it in: note that we call pmap_enter with the map and
+ * object unlocked in case we are kmem_map/kmem_object
+ * (because if pmap_enter wants to allocate out of kmem_object
+ * it will need to lock it itself!)
+ */
+#if defined(PMAP_NEW)
+ pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+#else
+ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+ UVM_PROT_ALL, TRUE);
+#endif
+ loopva += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+ return(kva);
+}
+
+/*
+ * uvm_km_free: free an area of kernel memory
+ */
+
+void
+uvm_km_free(map, addr, size)
+ vm_map_t map;
+ vaddr_t addr;
+ vsize_t size;
+{
+
+ uvm_unmap(map, trunc_page(addr), round_page(addr+size));
+}
+
+/*
+ * uvm_km_free_wakeup: free an area of kernel memory and wake up
+ * anyone waiting for vm space.
+ *
+ * => XXX: "wanted" bit + unlock&wait on other end?
+ */
+
+void
+uvm_km_free_wakeup(map, addr, size)
+ vm_map_t map;
+ vaddr_t addr;
+ vsize_t size;
+{
+ vm_map_entry_t dead_entries;
+
+ vm_map_lock(map);
+ (void)uvm_unmap_remove(map, trunc_page(addr), round_page(addr+size),
+ &dead_entries);
+ thread_wakeup(map);
+ vm_map_unlock(map);
+
+ if (dead_entries != NULL)
+ uvm_unmap_detach(dead_entries, 0);
+}
+
+/*
+ * uvm_km_alloc1: allocate wired down memory in the kernel map.
+ *
+ * => we can sleep if needed
+ */
+
+vaddr_t
+uvm_km_alloc1(map, size, zeroit)
+ vm_map_t map;
+ vsize_t size;
+ boolean_t zeroit;
+{
+ vaddr_t kva, loopva, offset;
+ struct vm_page *pg;
+ UVMHIST_FUNC("uvm_km_alloc1"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist,"(map=0x%x, size=0x%x)", map, size,0,0);
+
+#ifdef DIAGNOSTIC
+ if (vm_map_pmap(map) != pmap_kernel())
+ panic("uvm_km_alloc1");
+#endif
+
+ size = round_page(size);
+ kva = vm_map_min(map); /* hint */
+
+ /*
+ * allocate some virtual space
+ */
+
+ if (uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+ UVM_ADV_RANDOM, 0)) != KERN_SUCCESS) {
+ UVMHIST_LOG(maphist,"<- done (no VM)",0,0,0,0);
+ return(0);
+ }
+
+ /*
+ * recover object offset from virtual address
+ */
+
+ offset = kva - vm_map_min(kernel_map);
+ UVMHIST_LOG(maphist," kva=0x%x, offset=0x%x", kva, offset,0,0);
+
+ /*
+ * now allocate the memory. we must be careful about released pages.
+ */
+
+ loopva = kva;
+ while (size) {
+ simple_lock(&uvm.kernel_object->vmobjlock);
+ pg = uvm_pagelookup(uvm.kernel_object, offset);
+
+ /*
+ * if we found a page in an unallocated region, it must be
+ * released
+ */
+ if (pg) {
+ if ((pg->flags & PG_RELEASED) == 0)
+ panic("uvm_km_alloc1: non-released page");
+ pg->flags |= PG_WANTED;
+ UVM_UNLOCK_AND_WAIT(pg, &uvm.kernel_object->vmobjlock,
+ 0, "km_alloc", 0);
+ continue; /* retry */
+ }
+
+ /* allocate ram */
+ pg = uvm_pagealloc(uvm.kernel_object, offset, NULL);
+ if (pg) {
+ pg->flags &= ~PG_BUSY; /* new page */
+ UVM_PAGE_OWN(pg, NULL);
+ }
+ simple_unlock(&uvm.kernel_object->vmobjlock);
+ if (pg == NULL) {
+ uvm_wait("km_alloc1w"); /* wait for memory */
+ continue;
+ }
+
+ /* map it in */
+#if defined(PMAP_NEW)
+ pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), UVM_PROT_ALL);
+#else
+ pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+ UVM_PROT_ALL, TRUE);
+#endif
+ loopva += PAGE_SIZE;
+ offset += PAGE_SIZE;
+ size -= PAGE_SIZE;
+ }
+
+ /*
+ * zero on request (note that "size" is now zero due to the above loop
+ * so we need to subtract kva from loopva to reconstruct the size).
+ */
+
+ if (zeroit)
+ bzero((caddr_t)kva, loopva - kva);
+
+ UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+ return(kva);
+}
+
+/*
+ * uvm_km_valloc: allocate zero-fill memory in the kernel's address space
+ *
+ * => memory is not allocated until fault time
+ */
+
+vaddr_t
+uvm_km_valloc(map, size)
+ vm_map_t map;
+ vsize_t size;
+{
+ vaddr_t kva;
+ UVMHIST_FUNC("uvm_km_valloc"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x)", map, size, 0,0);
+
+#ifdef DIAGNOSTIC
+ if (vm_map_pmap(map) != pmap_kernel())
+ panic("uvm_km_valloc");
+#endif
+
+ size = round_page(size);
+ kva = vm_map_min(map); /* hint */
+
+ /*
+ * allocate some virtual space. will be demand filled by kernel_object.
+ */
+
+ if (uvm_map(map, &kva, size, uvm.kernel_object, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_NONE,
+ UVM_ADV_RANDOM, 0)) != KERN_SUCCESS) {
+ UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
+ return(0);
+ }
+
+ UVMHIST_LOG(maphist, "<- done (kva=0x%x)", kva,0,0,0);
+ return(kva);
+}
+
+/*
+ * uvm_km_valloc_wait: allocate zero-fill memory in the kernel's address space
+ *
+ * => memory is not allocated until fault time
+ * => if no room in map, wait for space to free, unless requested size
+ * is larger than map (in which case we return 0)
+ */
+
+vaddr_t
+uvm_km_valloc_wait(map, size)
+ vm_map_t map;
+ vsize_t size;
+{
+ vaddr_t kva;
+ UVMHIST_FUNC("uvm_km_valloc_wait"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x)", map, size, 0,0);
+
+#ifdef DIAGNOSTIC
+ if (vm_map_pmap(map) != pmap_kernel())
+ panic("uvm_km_valloc_wait");
+#endif
+
+ size = round_page(size);
+ if (size > vm_map_max(map) - vm_map_min(map))
+ return(0);
+
+ while (1) {
+ kva = vm_map_min(map); /* hint */
+
+ /*
+ * allocate some virtual space. will be demand filled
+ * by kernel_object.
+ */
+
+ if (uvm_map(map, &kva, size, uvm.kernel_object,
+ UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+ UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_RANDOM, 0))
+ == KERN_SUCCESS) {
+ UVMHIST_LOG(maphist,"<- done (kva=0x%x)", kva,0,0,0);
+ return(kva);
+ }
+
+ /*
+ * failed. sleep for a while (on map)
+ */
+
+ UVMHIST_LOG(maphist,"<<<sleeping>>>",0,0,0,0);
+ tsleep((caddr_t)map, PVM, "vallocwait", 0);
+ }
+ /*NOTREACHED*/
+}
+
+/* Sanity; must specify both or none. */
+#if (defined(PMAP_MAP_POOLPAGE) || defined(PMAP_UNMAP_POOLPAGE)) && \
+ (!defined(PMAP_MAP_POOLPAGE) || !defined(PMAP_UNMAP_POOLPAGE))
+#error Must specify MAP and UNMAP together.
+#endif
+
+/*
+ * uvm_km_alloc_poolpage: allocate a page for the pool allocator
+ *
+ * => if the pmap specifies an alternate mapping method, we use it.
+ */
+
+/* ARGSUSED */
+vaddr_t
+uvm_km_alloc_poolpage1(map, obj, waitok)
+ vm_map_t map;
+ struct uvm_object *obj;
+ boolean_t waitok;
+{
+#if defined(PMAP_MAP_POOLPAGE)
+ struct vm_page *pg;
+ vaddr_t va;
+
+ again:
+ pg = uvm_pagealloc(NULL, 0, NULL);
+ if (pg == NULL) {
+ if (waitok) {
+ uvm_wait("plpg");
+ goto again;
+ } else
+ return (0);
+ }
+ va = PMAP_MAP_POOLPAGE(VM_PAGE_TO_PHYS(pg));
+ if (va == 0)
+ uvm_pagefree(pg);
+ return (va);
+#else
+ vaddr_t va;
+ int s;
+
+ /*
+ * NOTE: We may be called with a map that doens't require splimp
+ * protection (e.g. kernel_map). However, it does not hurt to
+ * go to splimp in this case (since unprocted maps will never be
+ * accessed in interrupt context).
+ *
+ * XXX We may want to consider changing the interface to this
+ * XXX function.
+ */
+
+ s = splimp();
+ va = uvm_km_kmemalloc(map, obj, PAGE_SIZE, waitok ? 0 : UVM_KMF_NOWAIT);
+ splx(s);
+ return (va);
+#endif /* PMAP_MAP_POOLPAGE */
+}
+
+/*
+ * uvm_km_free_poolpage: free a previously allocated pool page
+ *
+ * => if the pmap specifies an alternate unmapping method, we use it.
+ */
+
+/* ARGSUSED */
+void
+uvm_km_free_poolpage1(map, addr)
+ vm_map_t map;
+ vaddr_t addr;
+{
+#if defined(PMAP_UNMAP_POOLPAGE)
+ paddr_t pa;
+
+ pa = PMAP_UNMAP_POOLPAGE(addr);
+ uvm_pagefree(PHYS_TO_VM_PAGE(pa));
+#else
+ int s;
+
+ /*
+ * NOTE: We may be called with a map that doens't require splimp
+ * protection (e.g. kernel_map). However, it does not hurt to
+ * go to splimp in this case (since unprocted maps will never be
+ * accessed in interrupt context).
+ *
+ * XXX We may want to consider changing the interface to this
+ * XXX function.
+ */
+
+ s = splimp();
+ uvm_km_free(map, addr, PAGE_SIZE);
+ splx(s);
+#endif /* PMAP_UNMAP_POOLPAGE */
+}
diff --git a/sys/uvm/uvm_km.h b/sys/uvm/uvm_km.h
new file mode 100644
index 00000000000..ba941255020
--- /dev/null
+++ b/sys/uvm/uvm_km.h
@@ -0,0 +1,55 @@
+/* $NetBSD: uvm_km.h,v 1.6 1998/08/13 02:11:01 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_km.h,v 1.1.2.2 1997/12/30 12:03:15 mrg Exp
+ */
+
+#ifndef _UVM_UVM_KM_H_
+#define _UVM_UVM_KM_H_
+
+/*
+ * uvm_km.h
+ */
+
+/*
+ * prototypes
+ */
+
+void uvm_km_init __P((vaddr_t, vaddr_t));
+void uvm_km_pgremove __P((struct uvm_object *, vaddr_t, vaddr_t));
+
+#endif /* _UVM_UVM_KM_H_ */
diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c
new file mode 100644
index 00000000000..d8716b46f52
--- /dev/null
+++ b/sys/uvm/uvm_loan.c
@@ -0,0 +1,755 @@
+/* $NetBSD: uvm_loan.c,v 1.13 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_loan.c,v 1.1.6.4 1998/02/06 05:08:43 chs Exp
+ */
+
+/*
+ * uvm_loan.c: page loanout handler
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * "loaned" pages are pages which are (read-only, copy-on-write) loaned
+ * from the VM system to other parts of the kernel. this allows page
+ * copying to be avoided (e.g. you can loan pages from objs/anons to
+ * the mbuf system).
+ *
+ * there are 3 types of loans possible:
+ * O->K uvm_object page to wired kernel page (e.g. mbuf data area)
+ * A->K anon page to kernel wired kernel page (e.g. mbuf data area)
+ * O->A uvm_object to anon loan (e.g. vnode page to an anon)
+ * note that it possible to have an O page loaned to both an A and K
+ * at the same time.
+ *
+ * loans are tracked by pg->loan_count. an O->A page will have both
+ * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort
+ * of page is considered "owned" by the uvm_object (not the anon).
+ *
+ * each loan of a page to a wired kernel page bumps the pg->wire_count.
+ * wired kernel mappings should be entered with pmap_kenter functions
+ * so that pmap_page_protect() will not affect the kernel mappings.
+ * (this requires the PMAP_NEW interface...).
+ *
+ * owners that want to free their pages and discover that they are
+ * loaned out simply "disown" them (the page becomes an orphan). these
+ * pages should be freed when the last loan is dropped. in some cases
+ * an anon may "adopt" an orphaned page.
+ *
+ * locking: to read pg->loan_count either the owner or the page queues
+ * must be locked. to modify pg->loan_count, both the owner of the page
+ * and the PQs must be locked. pg->flags is (as always) locked by
+ * the owner of the page.
+ *
+ * note that locking from the "loaned" side is tricky since the object
+ * getting the loaned page has no reference to the page's owner and thus
+ * the owner could "die" at any time. in order to prevent the owner
+ * from dying the page queues should be locked. this forces us to sometimes
+ * use "try" locking.
+ *
+ * loans are typically broken by the following events:
+ * 1. write fault to a loaned page
+ * 2. pageout of clean+inactive O->A loaned page
+ * 3. owner frees page (e.g. pager flush)
+ *
+ * note that loaning a page causes all mappings of the page to become
+ * read-only (via pmap_page_protect). this could have an unexpected
+ * effect on normal "wired" pages if one is not careful.
+ */
+
+/*
+ * local prototypes
+ */
+
+static int uvm_loananon __P((struct uvm_faultinfo *, void ***,
+ int, struct vm_anon *));
+static int uvm_loanentry __P((struct uvm_faultinfo *, void ***, int));
+static int uvm_loanuobj __P((struct uvm_faultinfo *, void ***,
+ int, vaddr_t));
+static int uvm_loanzero __P((struct uvm_faultinfo *, void ***, int));
+
+/*
+ * inlines
+ */
+
+/*
+ * uvm_loanentry: loan out pages in a map entry (helper fn for uvm_loan())
+ *
+ * => "ufi" is the result of a successful map lookup (meaning that
+ * the maps are locked by the caller)
+ * => we may unlock the maps if needed (for I/O)
+ * => we put our output result in "output"
+ * => we return the number of pages we loaned, or -1 if we had an error
+ */
+
+static __inline int
+uvm_loanentry(ufi, output, flags)
+ struct uvm_faultinfo *ufi;
+ void ***output;
+ int flags;
+{
+ vaddr_t curaddr = ufi->orig_rvaddr;
+ vsize_t togo = ufi->size;
+ struct vm_aref *aref = &ufi->entry->aref;
+ struct uvm_object *uobj = ufi->entry->object.uvm_obj;
+ struct vm_anon *anon;
+ int rv, result = 0;
+
+ /*
+ * lock us the rest of the way down
+ */
+ if (aref->ar_amap)
+ amap_lock(aref->ar_amap);
+ if (uobj)
+ simple_lock(&uobj->vmobjlock);
+
+ /*
+ * loop until done
+ */
+ while (togo) {
+
+ /*
+ * find the page we want. check the anon layer first.
+ */
+
+ if (aref->ar_amap) {
+ anon = amap_lookup(aref, curaddr - ufi->entry->start);
+ } else {
+ anon = NULL;
+ }
+
+ if (anon) {
+ rv = uvm_loananon(ufi, output, flags, anon);
+ } else if (uobj) {
+ rv = uvm_loanuobj(ufi, output, flags, curaddr);
+ } else if (UVM_ET_ISCOPYONWRITE(ufi->entry)) {
+ rv = uvm_loanzero(ufi, output, flags);
+ } else {
+ rv = -1; /* null map entry... fail now */
+ }
+
+ /* total failure */
+ if (rv < 0)
+ return(-1);
+
+ /* relock failed, need to do another lookup */
+ if (rv == 0)
+ return(result);
+
+ /*
+ * got it... advance to next page
+ */
+ result++;
+ togo -= PAGE_SIZE;
+ curaddr += PAGE_SIZE;
+ }
+
+ /*
+ * unlock everything and return
+ */
+ uvmfault_unlockall(ufi, aref->ar_amap, uobj, NULL);
+ return(result);
+}
+
+/*
+ * normal functions
+ */
+
+/*
+ * uvm_loan: loan pages out to anons or to the kernel
+ *
+ * => map should be unlocked
+ * => start and len should be multiples of PAGE_SIZE
+ * => result is either an array of anon's or vm_pages (depending on flags)
+ * => flag values: UVM_LOAN_TOANON - loan to anons
+ * UVM_LOAN_TOPAGE - loan to wired kernel page
+ * one and only one of these flags must be set!
+ */
+
+int
+uvm_loan(map, start, len, result, flags)
+ struct vm_map *map;
+ vaddr_t start;
+ vsize_t len;
+ void **result;
+ int flags;
+{
+ struct uvm_faultinfo ufi;
+ void **output;
+ int rv;
+
+ /*
+ * ensure that one and only one of the flags is set
+ */
+
+ if ((flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) ==
+ (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE) ||
+ (flags & (UVM_LOAN_TOANON|UVM_LOAN_TOPAGE)) == 0)
+ return(KERN_FAILURE);
+
+ /*
+ * "output" is a pointer to the current place to put the loaned
+ * page...
+ */
+
+ output = &result[0]; /* start at the beginning ... */
+
+ /*
+ * while we've got pages to do
+ */
+
+ while (len > 0) {
+
+ /*
+ * fill in params for a call to uvmfault_lookup
+ */
+
+ ufi.orig_map = map;
+ ufi.orig_rvaddr = start;
+ ufi.orig_size = len;
+
+ /*
+ * do the lookup, the only time this will fail is if we hit on
+ * an unmapped region (an error)
+ */
+
+ if (!uvmfault_lookup(&ufi, FALSE))
+ goto fail;
+
+ /*
+ * now do the loanout
+ */
+ rv = uvm_loanentry(&ufi, &output, flags);
+ if (rv < 0)
+ goto fail;
+
+ /*
+ * done! advance pointers and unlock.
+ */
+ rv <<= PAGE_SHIFT;
+ len -= rv;
+ start += rv;
+ uvmfault_unlockmaps(&ufi, FALSE);
+ }
+
+ /*
+ * got it! return success.
+ */
+
+ return(KERN_SUCCESS);
+
+fail:
+ /*
+ * fail: failed to do it. drop our loans and return failure code.
+ */
+ if (output - result) {
+ if (flags & UVM_LOAN_TOANON)
+ uvm_unloananon((struct vm_anon **)result,
+ output - result);
+ else
+ uvm_unloanpage((struct vm_page **)result,
+ output - result);
+ }
+ return(KERN_FAILURE);
+}
+
+/*
+ * uvm_loananon: loan a page from an anon out
+ *
+ * => return value:
+ * -1 = fatal error, everything is unlocked, abort.
+ * 0 = lookup in ufi went stale, everything unlocked, relookup and
+ * try again
+ * 1 = got it, everything still locked
+ */
+
+int
+uvm_loananon(ufi, output, flags, anon)
+ struct uvm_faultinfo *ufi;
+ void ***output;
+ int flags;
+ struct vm_anon *anon;
+{
+ struct vm_page *pg;
+ int result;
+
+ /*
+ * if we are loaning to another anon then it is easy, we just
+ * bump the reference count on the current anon and return a
+ * pointer to it.
+ */
+ if (flags & UVM_LOAN_TOANON) {
+ simple_lock(&anon->an_lock);
+ pg = anon->u.an_page;
+ if (pg && (pg->pqflags & PQ_ANON) != 0 && anon->an_ref == 1)
+ /* read protect it */
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+ anon->an_ref++;
+ **output = anon;
+ *output = (*output) + 1;
+ simple_unlock(&anon->an_lock);
+ return(1);
+ }
+
+ /*
+ * we are loaning to a kernel-page. we need to get the page
+ * resident so we can wire it. uvmfault_anonget will handle
+ * this for us.
+ */
+
+ simple_lock(&anon->an_lock);
+ result = uvmfault_anonget(ufi, ufi->entry->aref.ar_amap, anon);
+
+ /*
+ * if we were unable to get the anon, then uvmfault_anonget has
+ * unlocked everything and returned an error code.
+ */
+
+ if (result != VM_PAGER_OK) {
+
+ /* need to refault (i.e. refresh our lookup) ? */
+ if (result == VM_PAGER_REFAULT)
+ return(0);
+
+ /* "try again"? sleep a bit and retry ... */
+ if (result == VM_PAGER_AGAIN) {
+ tsleep((caddr_t)&lbolt, PVM, "loanagain", 0);
+ return(0);
+ }
+
+ /* otherwise flag it as an error */
+ return(-1);
+ }
+
+ /*
+ * we have the page and its owner locked: do the loan now.
+ */
+
+ pg = anon->u.an_page;
+ uvm_lock_pageq();
+ if (pg->loan_count == 0)
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+ pg->loan_count++;
+ uvm_pagewire(pg); /* always wire it */
+ uvm_unlock_pageq();
+ **output = pg;
+ *output = (*output) + 1;
+
+ /* unlock anon and return success */
+ if (pg->uobject)
+ simple_unlock(&pg->uobject->vmobjlock);
+ simple_unlock(&anon->an_lock);
+ return(1);
+}
+
+/*
+ * uvm_loanuobj: loan a page from a uobj out
+ *
+ * => return value:
+ * -1 = fatal error, everything is unlocked, abort.
+ * 0 = lookup in ufi went stale, everything unlocked, relookup and
+ * try again
+ * 1 = got it, everything still locked
+ */
+
+int
+uvm_loanuobj(ufi, output, flags, va)
+ struct uvm_faultinfo *ufi;
+ void ***output;
+ int flags;
+ vaddr_t va;
+{
+ struct vm_amap *amap = ufi->entry->aref.ar_amap;
+ struct uvm_object *uobj = ufi->entry->object.uvm_obj;
+ struct vm_page *pg;
+ struct vm_anon *anon;
+ int result, npages;
+ boolean_t locked;
+
+ /*
+ * first we must make sure the page is resident.
+ *
+ * XXXCDC: duplicate code with uvm_fault().
+ */
+
+ if (uobj->pgops->pgo_get) {
+ npages = 1;
+ pg = NULL;
+ result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
+ &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, PGO_LOCKED);
+ } else {
+ result = VM_PAGER_ERROR;
+ }
+
+ /*
+ * check the result of the locked pgo_get. if there is a problem,
+ * then we fail the loan.
+ */
+
+ if (result != VM_PAGER_OK && result != VM_PAGER_UNLOCK) {
+ uvmfault_unlockall(ufi, amap, uobj, NULL);
+ return(-1);
+ }
+
+ /*
+ * if we need to unlock for I/O, do so now.
+ */
+
+ if (result == VM_PAGER_UNLOCK) {
+ uvmfault_unlockall(ufi, amap, NULL, NULL);
+
+ npages = 1;
+ /* locked: uobj */
+ result = uobj->pgops->pgo_get(uobj, va - ufi->entry->start,
+ &pg, &npages, 0, VM_PROT_READ, MADV_NORMAL, 0);
+ /* locked: <nothing> */
+
+ /*
+ * check for errors
+ */
+
+ if (result != VM_PAGER_OK) {
+ if (result == VM_PAGER_AGAIN) {
+ tsleep((caddr_t)&lbolt, PVM, "fltagain2", 0);
+ return(0); /* redo the lookup and try again */
+ }
+ return(-1); /* total failure */
+ }
+
+ /*
+ * pgo_get was a success. attempt to relock everything.
+ */
+
+ locked = uvmfault_relock(ufi);
+ if (locked && amap)
+ amap_lock(amap);
+ simple_lock(&uobj->vmobjlock);
+
+ /*
+ * verify that the page has not be released and re-verify
+ * that amap slot is still free. if there is a problem we
+ * drop our lock (thus force a lookup refresh/retry).
+ */
+
+ if ((pg->flags & PG_RELEASED) != 0 ||
+ (locked && amap && amap_lookup(&ufi->entry->aref,
+ ufi->orig_rvaddr - ufi->entry->start))) {
+
+ if (locked)
+ uvmfault_unlockall(ufi, amap, NULL, NULL);
+ locked = FALSE;
+ }
+
+ /*
+ * didn't get the lock? release the page and retry.
+ */
+
+ if (locked == FALSE) {
+
+ if (pg->flags & PG_WANTED)
+ /* still holding object lock */
+ thread_wakeup(pg);
+
+ if (pg->flags & PG_RELEASED) {
+#ifdef DIAGNOSTIC
+ if (uobj->pgops->pgo_releasepg == NULL)
+ panic("uvm_loanuobj: object has no releasepg function");
+#endif
+ /* frees page */
+ if (uobj->pgops->pgo_releasepg(pg, NULL))
+ simple_unlock(&uobj->vmobjlock);
+ return (0);
+ }
+
+ uvm_lock_pageq();
+ uvm_pageactivate(pg); /* make sure it is in queues */
+ uvm_unlock_pageq();
+ pg->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(pg, NULL);
+ simple_unlock(&uobj->vmobjlock);
+ return (0);
+ }
+ }
+
+ /*
+ * at this point we have the page we want ("pg") marked PG_BUSY for us
+ * and we have all data structures locked. do the loanout. page can
+ * not be PG_RELEASED (we caught this above).
+ */
+
+ if ((flags & UVM_LOAN_TOANON) == 0) { /* loan to wired-kernel page? */
+ uvm_lock_pageq();
+ if (pg->loan_count == 0)
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+ pg->loan_count++;
+ uvm_pagewire(pg);
+ uvm_unlock_pageq();
+ **output = pg;
+ *output = (*output) + 1;
+ if (pg->flags & PG_WANTED)
+ thread_wakeup(pg);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ return(1); /* got it! */
+ }
+
+ /*
+ * must be a loan to an anon. check to see if there is already
+ * an anon associated with this page. if so, then just return
+ * a reference to this object. the page should already be
+ * mapped read-only because it is already on loan.
+ */
+
+ if (pg->uanon) {
+ anon = pg->uanon;
+ simple_lock(&anon->an_lock);
+ anon->an_ref++;
+ simple_unlock(&anon->an_lock);
+ **output = anon;
+ *output = (*output) + 1;
+ uvm_lock_pageq();
+ uvm_pageactivate(pg); /* reactivate */
+ uvm_unlock_pageq();
+ if (pg->flags & PG_WANTED)
+ thread_wakeup(pg);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ return(1);
+ }
+
+ /*
+ * need to allocate a new anon
+ */
+
+ anon = uvm_analloc();
+ if (anon == NULL) { /* out of VM! */
+ if (pg->flags & PG_WANTED)
+ thread_wakeup(pg);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ uvmfault_unlockall(ufi, amap, uobj, NULL);
+ return(-1);
+ }
+ anon->u.an_page = pg;
+ pg->uanon = anon;
+ uvm_lock_pageq();
+ if (pg->loan_count == 0)
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_READ);
+ pg->loan_count++;
+ uvm_pageactivate(pg);
+ uvm_unlock_pageq();
+ **output = anon;
+ *output = (*output) + 1;
+ if (pg->flags & PG_WANTED)
+ thread_wakeup(pg);
+ pg->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(pg, NULL);
+ return(1);
+}
+
+/*
+ * uvm_loanzero: "loan" a zero-fill page out
+ *
+ * => return value:
+ * -1 = fatal error, everything is unlocked, abort.
+ * 0 = lookup in ufi went stale, everything unlocked, relookup and
+ * try again
+ * 1 = got it, everything still locked
+ */
+
+int
+uvm_loanzero(ufi, output, flags)
+ struct uvm_faultinfo *ufi;
+ void ***output;
+ int flags;
+{
+ struct vm_anon *anon;
+ struct vm_page *pg;
+
+ if ((flags & UVM_LOAN_TOANON) == 0) { /* loaning to kernel-page */
+
+ while ((pg = uvm_pagealloc(NULL, 0, NULL)) == NULL) {
+ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
+ ufi->entry->object.uvm_obj, NULL);
+ uvm_wait("loanzero1");
+ if (!uvmfault_relock(ufi))
+ return(0);
+ if (ufi->entry->aref.ar_amap)
+ amap_lock(ufi->entry->aref.ar_amap);
+ if (ufi->entry->object.uvm_obj)
+ simple_lock(
+ &ufi->entry->object.uvm_obj->vmobjlock);
+ /* ... and try again */
+ }
+
+ /* got a page, zero it and return */
+ uvm_pagezero(pg); /* clears PG_CLEAN */
+ pg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+ **output = pg;
+ *output = (*output) + 1;
+ uvm_lock_pageq();
+ /* wire it as we are loaning to kernel-page */
+ uvm_pagewire(pg);
+ pg->loan_count = 1;
+ uvm_unlock_pageq();
+ return(1);
+ }
+
+ /* loaning to an anon */
+ while ((anon = uvm_analloc()) == NULL ||
+ (pg = uvm_pagealloc(NULL, 0, anon)) == NULL) {
+
+ /* unlock everything */
+ uvmfault_unlockall(ufi, ufi->entry->aref.ar_amap,
+ ufi->entry->object.uvm_obj, NULL);
+
+ /* out of swap causes us to fail */
+ if (anon == NULL)
+ return(-1);
+
+ uvm_anfree(anon);
+ uvm_wait("loanzero2"); /* wait for pagedaemon */
+
+ if (!uvmfault_relock(ufi))
+ /* map changed while unlocked, need relookup */
+ return (0);
+
+ /* relock everything else */
+ if (ufi->entry->aref.ar_amap)
+ amap_lock(ufi->entry->aref.ar_amap);
+ if (ufi->entry->object.uvm_obj)
+ simple_lock(&ufi->entry->object.uvm_obj->vmobjlock);
+ /* ... and try again */
+ }
+
+ /* got a page, zero it and return */
+ uvm_pagezero(pg); /* clears PG_CLEAN */
+ pg->flags &= ~(PG_BUSY|PG_FAKE);
+ UVM_PAGE_OWN(pg, NULL);
+ uvm_lock_pageq();
+ uvm_pageactivate(pg);
+ uvm_unlock_pageq();
+ **output = anon;
+ *output = (*output) + 1;
+ return(1);
+}
+
+
+/*
+ * uvm_unloananon: kill loans on anons (basically a normal ref drop)
+ *
+ * => we expect all our resources to be unlocked
+ */
+
+void
+uvm_unloananon(aloans, nanons)
+ struct vm_anon **aloans;
+ int nanons;
+{
+ struct vm_anon *anon;
+
+ while (nanons-- > 0) {
+ int refs;
+
+ anon = *aloans++;
+ simple_lock(&anon->an_lock);
+ refs = --anon->an_ref;
+ simple_unlock(&anon->an_lock);
+
+ if (refs == 0) {
+ uvm_anfree(anon); /* last reference: kill anon */
+ }
+ }
+}
+
+/*
+ * uvm_unloanpage: kill loans on pages loaned out to the kernel
+ *
+ * => we expect all our resources to be unlocked
+ */
+
+void
+uvm_unloanpage(ploans, npages)
+ struct vm_page **ploans;
+ int npages;
+{
+ struct vm_page *pg;
+
+ uvm_lock_pageq();
+
+ while (npages-- > 0) {
+ pg = *ploans++;
+
+ if (pg->loan_count < 1)
+ panic("uvm_unloanpage: page %p isn't loaned", pg);
+
+ pg->loan_count--; /* drop loan */
+ uvm_pageunwire(pg); /* and wire */
+
+ /*
+ * if page is unowned and we killed last loan, then we can
+ * free it
+ */
+ if (pg->loan_count == 0 && pg->uobject == NULL &&
+ pg->uanon == NULL) {
+
+ if (pg->flags & PG_BUSY)
+ panic("uvm_unloanpage: page %p unowned but PG_BUSY!", pg);
+
+ /* be safe */
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+ uvm_pagefree(pg); /* pageq locked above */
+
+ }
+ }
+
+ uvm_unlock_pageq();
+}
+
diff --git a/sys/uvm/uvm_loan.h b/sys/uvm/uvm_loan.h
new file mode 100644
index 00000000000..af99b357cf5
--- /dev/null
+++ b/sys/uvm/uvm_loan.h
@@ -0,0 +1,59 @@
+/* $NetBSD: uvm_loan.h,v 1.5 1998/08/13 02:11:01 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_loan.h,v 1.1.4.1 1997/12/08 16:07:14 chuck Exp
+ */
+
+#ifndef _UVM_UVM_LOAN_H_
+#define _UVM_UVM_LOAN_H_
+
+/*
+ * flags for uvm_loan
+ */
+
+#define UVM_LOAN_TOANON 0x1 /* loan to anon */
+#define UVM_LOAN_TOPAGE 0x2 /* loan to page */
+
+/*
+ * loan prototypes
+ */
+
+int uvm_loan __P((struct vm_map *, vaddr_t, vsize_t, void **, int));
+void uvm_unloananon __P((struct vm_anon **, int));
+void uvm_unloanpage __P((struct vm_page **, int));
+
+#endif /* _UVM_UVM_LOAN_H_ */
diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c
new file mode 100644
index 00000000000..a5b337db99d
--- /dev/null
+++ b/sys/uvm/uvm_map.c
@@ -0,0 +1,2972 @@
+/* $NetBSD: uvm_map.c,v 1.34 1999/01/24 23:53:15 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_map.c,v 1.1.2.27 1998/02/07 01:16:54 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_map.c: uvm map operations
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mman.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+
+#include <sys/user.h>
+#include <machine/pcb.h>
+
+#ifdef SYSVSHM
+#include <sys/shm.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_MAP
+#include <uvm/uvm.h>
+
+#ifdef DDB
+#include <uvm/uvm_ddb.h>
+#endif
+
+
+struct uvm_cnt uvm_map_call, map_backmerge, map_forwmerge;
+struct uvm_cnt uvm_mlk_call, uvm_mlk_hint;
+
+/*
+ * pool for vmspace structures.
+ */
+
+struct pool uvm_vmspace_pool;
+
+/*
+ * pool for dynamically-allocated map entries.
+ */
+
+struct pool uvm_map_entry_pool;
+
+/*
+ * macros
+ */
+
+/*
+ * uvm_map_entry_link: insert entry into a map
+ *
+ * => map must be locked
+ */
+#define uvm_map_entry_link(map, after_where, entry) do { \
+ (map)->nentries++; \
+ (entry)->prev = (after_where); \
+ (entry)->next = (after_where)->next; \
+ (entry)->prev->next = (entry); \
+ (entry)->next->prev = (entry); \
+} while (0)
+
+/*
+ * uvm_map_entry_unlink: remove entry from a map
+ *
+ * => map must be locked
+ */
+#define uvm_map_entry_unlink(map, entry) do { \
+ (map)->nentries--; \
+ (entry)->next->prev = (entry)->prev; \
+ (entry)->prev->next = (entry)->next; \
+} while (0)
+
+/*
+ * SAVE_HINT: saves the specified entry as the hint for future lookups.
+ *
+ * => map need not be locked (protected by hint_lock).
+ */
+#define SAVE_HINT(map,value) do { \
+ simple_lock(&(map)->hint_lock); \
+ (map)->hint = (value); \
+ simple_unlock(&(map)->hint_lock); \
+} while (0)
+
+/*
+ * VM_MAP_RANGE_CHECK: check and correct range
+ *
+ * => map must at least be read locked
+ */
+
+#define VM_MAP_RANGE_CHECK(map, start, end) do { \
+ if (start < vm_map_min(map)) \
+ start = vm_map_min(map); \
+ if (end > vm_map_max(map)) \
+ end = vm_map_max(map); \
+ if (start > end) \
+ start = end; \
+} while (0)
+
+/*
+ * local prototypes
+ */
+
+static vm_map_entry_t uvm_mapent_alloc __P((vm_map_t));
+static void uvm_mapent_copy __P((vm_map_entry_t,vm_map_entry_t));
+static void uvm_mapent_free __P((vm_map_entry_t));
+static void uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
+
+/*
+ * local inlines
+ */
+
+#undef UVM_MAP_INLINES
+
+#ifdef UVM_MAP_INLINES
+#define UVM_INLINE __inline
+#else
+#define UVM_INLINE
+#endif
+
+/*
+ * uvm_mapent_alloc: allocate a map entry
+ *
+ * => XXX: static pool for kernel map?
+ */
+
+static UVM_INLINE vm_map_entry_t
+uvm_mapent_alloc(map)
+ vm_map_t map;
+{
+ vm_map_entry_t me;
+ int s;
+ UVMHIST_FUNC("uvm_mapent_alloc");
+ UVMHIST_CALLED(maphist);
+
+ if (map->entries_pageable) {
+ me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
+ me->flags = 0;
+ /* me can't be null, wait ok */
+
+ } else {
+ s = splimp(); /* protect kentry_free list with splimp */
+ simple_lock(&uvm.kentry_lock);
+ me = uvm.kentry_free;
+ if (me) uvm.kentry_free = me->next;
+ simple_unlock(&uvm.kentry_lock);
+ splx(s);
+ if (!me)
+ panic("mapent_alloc: out of kernel map entries, check MAX_KMAPENT");
+ me->flags = UVM_MAP_STATIC;
+ }
+
+ UVMHIST_LOG(maphist, "<- new entry=0x%x [pageable=%d]",
+ me, map->entries_pageable, 0, 0);
+ return(me);
+
+}
+
+/*
+ * uvm_mapent_free: free map entry
+ *
+ * => XXX: static pool for kernel map?
+ */
+
+static UVM_INLINE void
+uvm_mapent_free(me)
+ vm_map_entry_t me;
+{
+ int s;
+ UVMHIST_FUNC("uvm_mapent_free");
+ UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"<- freeing map entry=0x%x [flags=%d]",
+ me, me->flags, 0, 0);
+ if ((me->flags & UVM_MAP_STATIC) == 0) {
+ pool_put(&uvm_map_entry_pool, me);
+ } else {
+ s = splimp(); /* protect kentry_free list with splimp */
+ simple_lock(&uvm.kentry_lock);
+ me->next = uvm.kentry_free;
+ uvm.kentry_free = me;
+ simple_unlock(&uvm.kentry_lock);
+ splx(s);
+ }
+}
+
+/*
+ * uvm_mapent_copy: copy a map entry, preserving flags
+ */
+
+static UVM_INLINE void
+uvm_mapent_copy(src, dst)
+ vm_map_entry_t src;
+ vm_map_entry_t dst;
+{
+
+ bcopy(src, dst, ((char *)&src->uvm_map_entry_stop_copy) - ((char*)src));
+}
+
+/*
+ * uvm_map_entry_unwire: unwire a map entry
+ *
+ * => map should be locked by caller
+ */
+
+static UVM_INLINE void
+uvm_map_entry_unwire(map, entry)
+ vm_map_t map;
+ vm_map_entry_t entry;
+{
+
+ uvm_fault_unwire(map->pmap, entry->start, entry->end);
+ entry->wired_count = 0;
+}
+
+/*
+ * uvm_map_init: init mapping system at boot time. note that we allocate
+ * and init the static pool of vm_map_entry_t's for the kernel here.
+ */
+
+void
+uvm_map_init()
+{
+ static struct vm_map_entry kernel_map_entry[MAX_KMAPENT];
+#if defined(UVMHIST)
+ static struct uvm_history_ent maphistbuf[100];
+ static struct uvm_history_ent pdhistbuf[100];
+#endif
+ int lcv;
+
+ /*
+ * first, init logging system.
+ */
+
+ UVMHIST_FUNC("uvm_map_init");
+ UVMHIST_INIT_STATIC(maphist, maphistbuf);
+ UVMHIST_INIT_STATIC(pdhist, pdhistbuf);
+ UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"<starting uvm map system>", 0, 0, 0, 0);
+ UVMCNT_INIT(uvm_map_call, UVMCNT_CNT, 0,
+ "# uvm_map() successful calls", 0);
+ UVMCNT_INIT(map_backmerge, UVMCNT_CNT, 0, "# uvm_map() back merges", 0);
+ UVMCNT_INIT(map_forwmerge, UVMCNT_CNT, 0, "# uvm_map() missed forward",
+ 0);
+ UVMCNT_INIT(uvm_mlk_call, UVMCNT_CNT, 0, "# map lookup calls", 0);
+ UVMCNT_INIT(uvm_mlk_hint, UVMCNT_CNT, 0, "# map lookup hint hits", 0);
+
+ /*
+ * now set up static pool of kernel map entrys ...
+ */
+
+ simple_lock_init(&uvm.kentry_lock);
+ uvm.kentry_free = NULL;
+ for (lcv = 0 ; lcv < MAX_KMAPENT ; lcv++) {
+ kernel_map_entry[lcv].next = uvm.kentry_free;
+ uvm.kentry_free = &kernel_map_entry[lcv];
+ }
+
+ /*
+ * initialize the map-related pools.
+ */
+ pool_init(&uvm_vmspace_pool, sizeof(struct vmspace),
+ 0, 0, 0, "vmsppl", 0,
+ pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP);
+ pool_init(&uvm_map_entry_pool, sizeof(struct vm_map_entry),
+ 0, 0, 0, "vmmpepl", 0,
+ pool_page_alloc_nointr, pool_page_free_nointr, M_VMMAP);
+}
+
+/*
+ * clippers
+ */
+
+/*
+ * uvm_map_clip_start: ensure that the entry begins at or after
+ * the starting address, if it doesn't we split the entry.
+ *
+ * => caller should use UVM_MAP_CLIP_START macro rather than calling
+ * this directly
+ * => map must be locked by caller
+ */
+
+void uvm_map_clip_start(map, entry, start)
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vaddr_t start;
+{
+ vm_map_entry_t new_entry;
+ vaddr_t new_adj;
+
+ /* uvm_map_simplify_entry(map, entry); */ /* XXX */
+
+ /*
+ * Split off the front portion. note that we must insert the new
+ * entry BEFORE this one, so that this entry has the specified
+ * starting address.
+ */
+
+ new_entry = uvm_mapent_alloc(map);
+ uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
+
+ new_entry->end = start;
+ new_adj = start - new_entry->start;
+ if (entry->object.uvm_obj)
+ entry->offset += new_adj; /* shift start over */
+ entry->start = start;
+
+ if (new_entry->aref.ar_amap) {
+ amap_splitref(&new_entry->aref, &entry->aref, new_adj);
+ }
+
+ uvm_map_entry_link(map, entry->prev, new_entry);
+
+ if (UVM_ET_ISSUBMAP(entry)) {
+ /* ... unlikely to happen, but play it safe */
+ uvm_map_reference(new_entry->object.sub_map);
+ } else {
+ if (UVM_ET_ISOBJ(entry) &&
+ entry->object.uvm_obj->pgops &&
+ entry->object.uvm_obj->pgops->pgo_reference)
+ entry->object.uvm_obj->pgops->pgo_reference(
+ entry->object.uvm_obj);
+ }
+}
+
+/*
+ * uvm_map_clip_end: ensure that the entry ends at or before
+ * the ending address, if it does't we split the reference
+ *
+ * => caller should use UVM_MAP_CLIP_END macro rather than calling
+ * this directly
+ * => map must be locked by caller
+ */
+
+void
+uvm_map_clip_end(map, entry, end)
+ vm_map_t map;
+ vm_map_entry_t entry;
+ vaddr_t end;
+{
+ vm_map_entry_t new_entry;
+ vaddr_t new_adj; /* #bytes we move start forward */
+
+ /*
+ * Create a new entry and insert it
+ * AFTER the specified entry
+ */
+
+ new_entry = uvm_mapent_alloc(map);
+ uvm_mapent_copy(entry, new_entry); /* entry -> new_entry */
+
+ new_entry->start = entry->end = end;
+ new_adj = end - entry->start;
+ if (new_entry->object.uvm_obj)
+ new_entry->offset += new_adj;
+
+ if (entry->aref.ar_amap)
+ amap_splitref(&entry->aref, &new_entry->aref, new_adj);
+
+ uvm_map_entry_link(map, entry, new_entry);
+
+ if (UVM_ET_ISSUBMAP(entry)) {
+ /* ... unlikely to happen, but play it safe */
+ uvm_map_reference(new_entry->object.sub_map);
+ } else {
+ if (UVM_ET_ISOBJ(entry) &&
+ entry->object.uvm_obj->pgops &&
+ entry->object.uvm_obj->pgops->pgo_reference)
+ entry->object.uvm_obj->pgops->pgo_reference(
+ entry->object.uvm_obj);
+ }
+}
+
+
+/*
+ * M A P - m a i n e n t r y p o i n t
+ */
+/*
+ * uvm_map: establish a valid mapping in a map
+ *
+ * => assume startp is page aligned.
+ * => assume size is a multiple of PAGE_SIZE.
+ * => assume sys_mmap provides enough of a "hint" to have us skip
+ * over text/data/bss area.
+ * => map must be unlocked (we will lock it)
+ * => <uobj,uoffset> value meanings (4 cases):
+ * [1] <NULL,uoffset> == uoffset is a hint for PMAP_PREFER
+ * [2] <NULL,UVM_UNKNOWN_OFFSET> == don't PMAP_PREFER
+ * [3] <uobj,uoffset> == normal mapping
+ * [4] <uobj,UVM_UNKNOWN_OFFSET> == uvm_map finds offset based on VA
+ *
+ * case [4] is for kernel mappings where we don't know the offset until
+ * we've found a virtual address. note that kernel object offsets are
+ * always relative to vm_map_min(kernel_map).
+ * => XXXCDC: need way to map in external amap?
+ */
+
+int
+uvm_map(map, startp, size, uobj, uoffset, flags)
+ vm_map_t map;
+ vaddr_t *startp; /* IN/OUT */
+ vsize_t size;
+ struct uvm_object *uobj;
+ vaddr_t uoffset;
+ uvm_flag_t flags;
+{
+ vm_map_entry_t prev_entry, new_entry;
+ vm_prot_t prot = UVM_PROTECTION(flags), maxprot =
+ UVM_MAXPROTECTION(flags);
+ vm_inherit_t inherit = UVM_INHERIT(flags);
+ int advice = UVM_ADVICE(flags);
+ UVMHIST_FUNC("uvm_map");
+ UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, *startp=0x%x, size=%d, flags=0x%x)",
+ map, *startp, size, flags);
+ UVMHIST_LOG(maphist, " uobj/offset 0x%x/%d", uobj, uoffset,0,0);
+
+ /*
+ * step 0: sanity check of protection code
+ */
+
+ if ((prot & maxprot) != prot) {
+ UVMHIST_LOG(maphist, "<- prot. failure: prot=0x%x, max=0x%x",
+ prot, maxprot,0,0);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
+ /*
+ * step 1: figure out where to put new VM range
+ */
+
+ if (vm_map_lock_try(map) == FALSE) {
+ if (flags & UVM_FLAG_TRYLOCK)
+ return(KERN_FAILURE);
+ vm_map_lock(map); /* could sleep here */
+ }
+ if ((prev_entry = uvm_map_findspace(map, *startp, size, startp,
+ uobj, uoffset, flags & UVM_FLAG_FIXED)) == NULL) {
+ UVMHIST_LOG(maphist,"<- uvm_map_findspace failed!",0,0,0,0);
+ vm_map_unlock(map);
+ return (KERN_NO_SPACE);
+ }
+
+#if defined(PMAP_GROWKERNEL) /* hack */
+ {
+ /* locked by kernel_map lock */
+ static vaddr_t maxkaddr = 0;
+
+ /*
+ * hack: grow kernel PTPs in advance.
+ */
+ if (map == kernel_map && maxkaddr < (*startp + size)) {
+ pmap_growkernel(*startp + size);
+ maxkaddr = *startp + size;
+ }
+ }
+#endif
+
+ UVMCNT_INCR(uvm_map_call);
+
+ /*
+ * if uobj is null, then uoffset is either a VAC hint for PMAP_PREFER
+ * [typically from uvm_map_reserve] or it is UVM_UNKNOWN_OFFSET. in
+ * either case we want to zero it before storing it in the map entry
+ * (because it looks strange and confusing when debugging...)
+ *
+ * if uobj is not null
+ * if uoffset is not UVM_UNKNOWN_OFFSET then we have a normal mapping
+ * and we do not need to change uoffset.
+ * if uoffset is UVM_UNKNOWN_OFFSET then we need to find the offset
+ * now (based on the starting address of the map). this case is
+ * for kernel object mappings where we don't know the offset until
+ * the virtual address is found (with uvm_map_findspace). the
+ * offset is the distance we are from the start of the map.
+ */
+
+ if (uobj == NULL) {
+ uoffset = 0;
+ } else {
+ if (uoffset == UVM_UNKNOWN_OFFSET) {
+#ifdef DIAGNOSTIC
+ if (uobj->uo_refs != UVM_OBJ_KERN)
+ panic("uvm_map: unknown offset with non-kernel object");
+#endif
+ uoffset = *startp - vm_map_min(kernel_map);
+ }
+ }
+
+ /*
+ * step 2: try and insert in map by extending previous entry, if
+ * possible
+ * XXX: we don't try and pull back the next entry. might be useful
+ * for a stack, but we are currently allocating our stack in advance.
+ */
+
+ if ((flags & UVM_FLAG_NOMERGE) == 0 &&
+ prev_entry->end == *startp && prev_entry != &map->header &&
+ prev_entry->object.uvm_obj == uobj) {
+
+ if (uobj && prev_entry->offset +
+ (prev_entry->end - prev_entry->start) != uoffset)
+ goto step3;
+
+ if (UVM_ET_ISSUBMAP(prev_entry))
+ goto step3;
+
+ if (prev_entry->protection != prot ||
+ prev_entry->max_protection != maxprot)
+ goto step3;
+
+ if (prev_entry->inheritance != inherit ||
+ prev_entry->advice != advice)
+ goto step3;
+
+ /* wired_count's must match (new area is unwired) */
+ if (prev_entry->wired_count)
+ goto step3;
+
+ /*
+ * can't extend a shared amap. note: no need to lock amap to
+ * look at refs since we don't care about its exact value.
+ * if it is one (i.e. we have only reference) it will stay there
+ */
+
+ if (prev_entry->aref.ar_amap &&
+ amap_refs(prev_entry->aref.ar_amap) != 1) {
+ goto step3;
+ }
+
+ /* got it! */
+
+ UVMCNT_INCR(map_backmerge);
+ UVMHIST_LOG(maphist," starting back merge", 0, 0, 0, 0);
+
+ /*
+ * drop our reference to uobj since we are extending a reference
+ * that we already have (the ref count can not drop to zero).
+ */
+ if (uobj && uobj->pgops->pgo_detach)
+ uobj->pgops->pgo_detach(uobj);
+
+ if (prev_entry->aref.ar_amap) {
+ amap_extend(prev_entry, size);
+ }
+
+ prev_entry->end += size;
+ map->size += size;
+
+ UVMHIST_LOG(maphist,"<- done (via backmerge)!", 0, 0, 0, 0);
+ vm_map_unlock(map);
+ return (KERN_SUCCESS);
+
+ }
+step3:
+ UVMHIST_LOG(maphist," allocating new map entry", 0, 0, 0, 0);
+
+ /*
+ * check for possible forward merge (which we don't do) and count
+ * the number of times we missed a *possible* chance to merge more
+ */
+
+ if ((flags & UVM_FLAG_NOMERGE) == 0 &&
+ prev_entry->next != &map->header &&
+ prev_entry->next->start == (*startp + size))
+ UVMCNT_INCR(map_forwmerge);
+
+ /*
+ * step 3: allocate new entry and link it in
+ */
+
+ new_entry = uvm_mapent_alloc(map);
+ new_entry->start = *startp;
+ new_entry->end = new_entry->start + size;
+ new_entry->object.uvm_obj = uobj;
+ new_entry->offset = uoffset;
+
+ if (uobj)
+ new_entry->etype = UVM_ET_OBJ;
+ else
+ new_entry->etype = 0;
+
+ if (flags & UVM_FLAG_COPYONW) {
+ new_entry->etype |= UVM_ET_COPYONWRITE;
+ if ((flags & UVM_FLAG_OVERLAY) == 0)
+ new_entry->etype |= UVM_ET_NEEDSCOPY;
+ }
+
+ new_entry->protection = prot;
+ new_entry->max_protection = maxprot;
+ new_entry->inheritance = inherit;
+ new_entry->wired_count = 0;
+ new_entry->advice = advice;
+ if (flags & UVM_FLAG_OVERLAY) {
+ /*
+ * to_add: for BSS we overallocate a little since we
+ * are likely to extend
+ */
+ vaddr_t to_add = (flags & UVM_FLAG_AMAPPAD) ?
+ UVM_AMAP_CHUNK << PAGE_SHIFT : 0;
+ struct vm_amap *amap = amap_alloc(size, to_add, M_WAITOK);
+ new_entry->aref.ar_pageoff = 0;
+ new_entry->aref.ar_amap = amap;
+ } else {
+ new_entry->aref.ar_amap = NULL;
+ }
+
+ uvm_map_entry_link(map, prev_entry, new_entry);
+
+ map->size += size;
+
+ /*
+ * Update the free space hint
+ */
+
+ if ((map->first_free == prev_entry) &&
+ (prev_entry->end >= new_entry->start))
+ map->first_free = new_entry;
+
+ UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+ vm_map_unlock(map);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_lookup_entry: find map entry at or before an address
+ *
+ * => map must at least be read-locked by caller
+ * => entry is returned in "entry"
+ * => return value is true if address is in the returned entry
+ */
+
+boolean_t
+uvm_map_lookup_entry(map, address, entry)
+ vm_map_t map;
+ vaddr_t address;
+ vm_map_entry_t *entry; /* OUT */
+{
+ vm_map_entry_t cur;
+ vm_map_entry_t last;
+ UVMHIST_FUNC("uvm_map_lookup_entry");
+ UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist,"(map=0x%x,addr=0x%x,ent=0x%x)",
+ map, address, entry, 0);
+
+ /*
+ * start looking either from the head of the
+ * list, or from the hint.
+ */
+
+ simple_lock(&map->hint_lock);
+ cur = map->hint;
+ simple_unlock(&map->hint_lock);
+
+ if (cur == &map->header)
+ cur = cur->next;
+
+ UVMCNT_INCR(uvm_mlk_call);
+ if (address >= cur->start) {
+ /*
+ * go from hint to end of list.
+ *
+ * but first, make a quick check to see if
+ * we are already looking at the entry we
+ * want (which is usually the case).
+ * note also that we don't need to save the hint
+ * here... it is the same hint (unless we are
+ * at the header, in which case the hint didn't
+ * buy us anything anyway).
+ */
+ last = &map->header;
+ if ((cur != last) && (cur->end > address)) {
+ UVMCNT_INCR(uvm_mlk_hint);
+ *entry = cur;
+ UVMHIST_LOG(maphist,"<- got it via hint (0x%x)",
+ cur, 0, 0, 0);
+ return (TRUE);
+ }
+ } else {
+ /*
+ * go from start to hint, *inclusively*
+ */
+ last = cur->next;
+ cur = map->header.next;
+ }
+
+ /*
+ * search linearly
+ */
+
+ while (cur != last) {
+ if (cur->end > address) {
+ if (address >= cur->start) {
+ /*
+ * save this lookup for future
+ * hints, and return
+ */
+
+ *entry = cur;
+ SAVE_HINT(map, cur);
+ UVMHIST_LOG(maphist,"<- search got it (0x%x)",
+ cur, 0, 0, 0);
+ return (TRUE);
+ }
+ break;
+ }
+ cur = cur->next;
+ }
+ *entry = cur->prev;
+ SAVE_HINT(map, *entry);
+ UVMHIST_LOG(maphist,"<- failed!",0,0,0,0);
+ return (FALSE);
+}
+
+
+/*
+ * uvm_map_findspace: find "length" sized space in "map".
+ *
+ * => "hint" is a hint about where we want it, unless fixed is true
+ * (in which case we insist on using "hint").
+ * => "result" is VA returned
+ * => uobj/uoffset are to be used to handle VAC alignment, if required
+ * => caller must at least have read-locked map
+ * => returns NULL on failure, or pointer to prev. map entry if success
+ * => note this is a cross between the old vm_map_findspace and vm_map_find
+ */
+
+vm_map_entry_t
+uvm_map_findspace(map, hint, length, result, uobj, uoffset, fixed)
+ vm_map_t map;
+ vaddr_t hint;
+ vsize_t length;
+ vaddr_t *result; /* OUT */
+ struct uvm_object *uobj;
+ vaddr_t uoffset;
+ boolean_t fixed;
+{
+ vm_map_entry_t entry, next, tmp;
+ vaddr_t end;
+ UVMHIST_FUNC("uvm_map_findspace");
+ UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, hint=0x%x, len=%d, fixed=%d)",
+ map, hint, length, fixed);
+
+ if (hint < map->min_offset) { /* check ranges ... */
+ if (fixed) {
+ UVMHIST_LOG(maphist,"<- VA below map range",0,0,0,0);
+ return(NULL);
+ }
+ hint = map->min_offset;
+ }
+ if (hint > map->max_offset) {
+ UVMHIST_LOG(maphist,"<- VA 0x%x > range [0x%x->0x%x]",
+ hint, map->min_offset, map->max_offset, 0);
+ return(NULL);
+ }
+
+ /*
+ * Look for the first possible address; if there's already
+ * something at this address, we have to start after it.
+ */
+
+ if (!fixed && hint == map->min_offset) {
+ if ((entry = map->first_free) != &map->header)
+ hint = entry->end;
+ } else {
+ if (uvm_map_lookup_entry(map, hint, &tmp)) {
+ /* "hint" address already in use ... */
+ if (fixed) {
+ UVMHIST_LOG(maphist,"<- fixed & VA in use",
+ 0, 0, 0, 0);
+ return(NULL);
+ }
+ hint = tmp->end;
+ }
+ entry = tmp;
+ }
+
+ /*
+ * Look through the rest of the map, trying to fit a new region in
+ * the gap between existing regions, or after the very last region.
+ * note: entry->end = base VA of current gap,
+ * next->start = VA of end of current gap
+ */
+ for (;; hint = (entry = next)->end) {
+ /*
+ * Find the end of the proposed new region. Be sure we didn't
+ * go beyond the end of the map, or wrap around the address;
+ * if so, we lose. Otherwise, if this is the last entry, or
+ * if the proposed new region fits before the next entry, we
+ * win.
+ */
+
+#ifdef PMAP_PREFER
+ /*
+ * push hint forward as needed to avoid VAC alias problems.
+ * we only do this if a valid offset is specified.
+ */
+ if (!fixed && uoffset != UVM_UNKNOWN_OFFSET)
+ PMAP_PREFER(uoffset, &hint);
+#endif
+ end = hint + length;
+ if (end > map->max_offset || end < hint) {
+ UVMHIST_LOG(maphist,"<- failed (off end)", 0,0,0,0);
+ return (NULL);
+ }
+ next = entry->next;
+ if (next == &map->header || next->start >= end)
+ break;
+ if (fixed) {
+ UVMHIST_LOG(maphist,"<- fixed mapping failed", 0,0,0,0);
+ return(NULL); /* only one shot at it ... */
+ }
+ }
+ SAVE_HINT(map, entry);
+ *result = hint;
+ UVMHIST_LOG(maphist,"<- got it! (result=0x%x)", hint, 0,0,0);
+ return (entry);
+}
+
+/*
+ * U N M A P - m a i n h e l p e r f u n c t i o n s
+ */
+
+/*
+ * uvm_unmap_remove: remove mappings from a vm_map (from "start" up to "stop")
+ *
+ * => caller must check alignment and size
+ * => map must be locked by caller
+ * => we return a list of map entries that we've remove from the map
+ * in "entry_list"
+ */
+
+int
+uvm_unmap_remove(map, start, end, entry_list)
+ vm_map_t map;
+ vaddr_t start,end;
+ vm_map_entry_t *entry_list; /* OUT */
+{
+ vm_map_entry_t entry, first_entry, next;
+ vaddr_t len;
+ UVMHIST_FUNC("uvm_unmap_remove");
+ UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist,"(map=0x%x, start=0x%x, end=0x%x)",
+ map, start, end, 0);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ /*
+ * find first entry
+ */
+ if (uvm_map_lookup_entry(map, start, &first_entry) == TRUE) {
+ /* clip and go... */
+ entry = first_entry;
+ UVM_MAP_CLIP_START(map, entry, start);
+ /* critical! prevents stale hint */
+ SAVE_HINT(map, entry->prev);
+
+ } else {
+ entry = first_entry->next;
+ }
+
+ /*
+ * Save the free space hint
+ */
+
+ if (map->first_free->start >= start)
+ map->first_free = entry->prev;
+
+ /*
+ * note: we now re-use first_entry for a different task. we remove
+ * a number of map entries from the map and save them in a linked
+ * list headed by "first_entry". once we remove them from the map
+ * the caller should unlock the map and drop the references to the
+ * backing objects [c.f. uvm_unmap_detach]. the object is to
+ * seperate unmapping from reference dropping. why?
+ * [1] the map has to be locked for unmapping
+ * [2] the map need not be locked for reference dropping
+ * [3] dropping references may trigger pager I/O, and if we hit
+ * a pager that does synchronous I/O we may have to wait for it.
+ * [4] we would like all waiting for I/O to occur with maps unlocked
+ * so that we don't block other threads.
+ */
+ first_entry = NULL;
+ *entry_list = NULL; /* to be safe */
+
+ /*
+ * break up the area into map entry sized regions and unmap. note
+ * that all mappings have to be removed before we can even consider
+ * dropping references to amaps or VM objects (otherwise we could end
+ * up with a mapping to a page on the free list which would be very bad)
+ */
+
+ while ((entry != &map->header) && (entry->start < end)) {
+
+ UVM_MAP_CLIP_END(map, entry, end);
+ next = entry->next;
+ len = entry->end - entry->start;
+
+ /*
+ * unwire before removing addresses from the pmap; otherwise
+ * unwiring will put the entries back into the pmap (XXX).
+ */
+
+ if (entry->wired_count)
+ uvm_map_entry_unwire(map, entry);
+
+ /*
+ * special case: handle mappings to anonymous kernel objects.
+ * we want to free these pages right away...
+ */
+ if (UVM_ET_ISOBJ(entry) &&
+ entry->object.uvm_obj->uo_refs == UVM_OBJ_KERN) {
+
+#ifdef DIAGNOSTIC
+ if (vm_map_pmap(map) != pmap_kernel())
+ panic("uvm_unmap_remove: kernel object mapped by non-kernel map");
+#endif
+
+ /*
+ * note: kernel object mappings are currently used in
+ * two ways:
+ * [1] "normal" mappings of pages in the kernel object
+ * [2] uvm_km_valloc'd allocations in which we
+ * pmap_enter in some non-kernel-object page
+ * (e.g. vmapbuf).
+ *
+ * for case [1], we need to remove the mapping from
+ * the pmap and then remove the page from the kernel
+ * object (because, once pages in a kernel object are
+ * unmapped they are no longer needed, unlike, say,
+ * a vnode where you might want the data to persist
+ * until flushed out of a queue).
+ *
+ * for case [2], we need to remove the mapping from
+ * the pmap. there shouldn't be any pages at the
+ * specified offset in the kernel object [but it
+ * doesn't hurt to call uvm_km_pgremove just to be
+ * safe?]
+ *
+ * uvm_km_pgremove currently does the following:
+ * for pages in the kernel object in range:
+ * - pmap_page_protect them out of all pmaps
+ * - uvm_pagefree the page
+ *
+ * note that in case [1] the pmap_page_protect call
+ * in uvm_km_pgremove may very well be redundant
+ * because we have already removed the mappings
+ * beforehand with pmap_remove (or pmap_kremove).
+ * in the PMAP_NEW case, the pmap_page_protect call
+ * may not do anything, since PMAP_NEW allows the
+ * kernel to enter/remove kernel mappings without
+ * bothing to keep track of the mappings (e.g. via
+ * pv_entry lists). XXX: because of this, in the
+ * future we should consider removing the
+ * pmap_page_protect from uvm_km_pgremove some time
+ * in the future.
+ */
+
+ /*
+ * remove mappings from pmap
+ */
+#if defined(PMAP_NEW)
+ pmap_kremove(entry->start, len);
+#else
+ pmap_remove(pmap_kernel(), entry->start,
+ entry->start+len);
+#endif
+
+ /*
+ * remove pages from a kernel object (offsets are
+ * always relative to vm_map_min(kernel_map)).
+ */
+ uvm_km_pgremove(entry->object.uvm_obj,
+ entry->start - vm_map_min(kernel_map),
+ entry->end - vm_map_min(kernel_map));
+
+ /*
+ * null out kernel_object reference, we've just
+ * dropped it
+ */
+ entry->etype &= ~UVM_ET_OBJ;
+ entry->object.uvm_obj = NULL; /* to be safe */
+
+ } else {
+ /*
+ * remove mappings the standard way.
+ */
+ pmap_remove(map->pmap, entry->start, entry->end);
+ }
+
+ /*
+ * remove entry from map and put it on our list of entries
+ * that we've nuked. then go do next entry.
+ */
+ UVMHIST_LOG(maphist, " removed map entry 0x%x", entry, 0, 0,0);
+ uvm_map_entry_unlink(map, entry);
+ map->size -= len;
+ entry->next = first_entry;
+ first_entry = entry;
+ entry = next; /* next entry, please */
+ }
+
+ /*
+ * now we've cleaned up the map and are ready for the caller to drop
+ * references to the mapped objects.
+ */
+
+ *entry_list = first_entry;
+ UVMHIST_LOG(maphist,"<- done!", 0, 0, 0, 0);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_unmap_detach: drop references in a chain of map entries
+ *
+ * => we will free the map entries as we traverse the list.
+ */
+
+void
+uvm_unmap_detach(first_entry, amap_unref_flags)
+ vm_map_entry_t first_entry;
+ int amap_unref_flags;
+{
+ vm_map_entry_t next_entry;
+ UVMHIST_FUNC("uvm_unmap_detach"); UVMHIST_CALLED(maphist);
+
+ while (first_entry) {
+
+#ifdef DIAGNOSTIC
+ /*
+ * sanity check
+ */
+ /* was part of vm_map_entry_delete() */
+ if (first_entry->wired_count)
+ panic("unmap: still wired!");
+#endif
+
+ UVMHIST_LOG(maphist,
+ " detach 0x%x: amap=0x%x, obj=0x%x, submap?=%d",
+ first_entry, first_entry->aref.ar_amap,
+ first_entry->object.uvm_obj,
+ UVM_ET_ISSUBMAP(first_entry));
+
+ /*
+ * drop reference to amap, if we've got one
+ */
+
+ if (first_entry->aref.ar_amap)
+ amap_unref(first_entry, amap_unref_flags);
+
+ /*
+ * drop reference to our backing object, if we've got one
+ */
+
+ if (UVM_ET_ISSUBMAP(first_entry)) {
+ /* ... unlikely to happen, but play it safe */
+ uvm_map_deallocate(first_entry->object.sub_map);
+ } else {
+ if (UVM_ET_ISOBJ(first_entry) &&
+ first_entry->object.uvm_obj->pgops->pgo_detach)
+ first_entry->object.uvm_obj->pgops->
+ pgo_detach(first_entry->object.uvm_obj);
+ }
+
+ /*
+ * next entry
+ */
+ next_entry = first_entry->next;
+ uvm_mapent_free(first_entry);
+ first_entry = next_entry;
+ }
+
+ /*
+ * done!
+ */
+ UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
+ return;
+}
+
+/*
+ * E X T R A C T I O N F U N C T I O N S
+ */
+
+/*
+ * uvm_map_reserve: reserve space in a vm_map for future use.
+ *
+ * => we reserve space in a map by putting a dummy map entry in the
+ * map (dummy means obj=NULL, amap=NULL, prot=VM_PROT_NONE)
+ * => map should be unlocked (we will write lock it)
+ * => we return true if we were able to reserve space
+ * => XXXCDC: should be inline?
+ */
+
+int
+uvm_map_reserve(map, size, offset, raddr)
+ vm_map_t map;
+ vsize_t size;
+ vaddr_t offset; /* hint for pmap_prefer */
+ vaddr_t *raddr; /* OUT: reserved VA */
+{
+ UVMHIST_FUNC("uvm_map_reserve"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(map=0x%x, size=0x%x, offset=0x%x,addr=0x%x)",
+ map,size,offset,raddr);
+
+ size = round_page(size);
+ if (*raddr < vm_map_min(map))
+ *raddr = vm_map_min(map); /* hint */
+
+ /*
+ * reserve some virtual space.
+ */
+
+ if (uvm_map(map, raddr, size, NULL, offset,
+ UVM_MAPFLAG(UVM_PROT_NONE, UVM_PROT_NONE, UVM_INH_NONE,
+ UVM_ADV_RANDOM, UVM_FLAG_NOMERGE)) != KERN_SUCCESS) {
+ UVMHIST_LOG(maphist, "<- done (no VM)", 0,0,0,0);
+ return (FALSE);
+ }
+
+ UVMHIST_LOG(maphist, "<- done (*raddr=0x%x)", *raddr,0,0,0);
+ return (TRUE);
+}
+
+/*
+ * uvm_map_replace: replace a reserved (blank) area of memory with
+ * real mappings.
+ *
+ * => caller must WRITE-LOCK the map
+ * => we return TRUE if replacement was a success
+ * => we expect the newents chain to have nnewents entrys on it and
+ * we expect newents->prev to point to the last entry on the list
+ * => note newents is allowed to be NULL
+ */
+
+int
+uvm_map_replace(map, start, end, newents, nnewents)
+ struct vm_map *map;
+ vaddr_t start, end;
+ vm_map_entry_t newents;
+ int nnewents;
+{
+ vm_map_entry_t oldent, last;
+ UVMHIST_FUNC("uvm_map_replace");
+ UVMHIST_CALLED(maphist);
+
+ /*
+ * first find the blank map entry at the specified address
+ */
+
+ if (!uvm_map_lookup_entry(map, start, &oldent)) {
+ return(FALSE);
+ }
+
+ /*
+ * check to make sure we have a proper blank entry
+ */
+
+ if (oldent->start != start || oldent->end != end ||
+ oldent->object.uvm_obj != NULL || oldent->aref.ar_amap != NULL) {
+ return (FALSE);
+ }
+
+#ifdef DIAGNOSTIC
+ /*
+ * sanity check the newents chain
+ */
+ {
+ vm_map_entry_t tmpent = newents;
+ int nent = 0;
+ vaddr_t cur = start;
+
+ while (tmpent) {
+ nent++;
+ if (tmpent->start < cur)
+ panic("uvm_map_replace1");
+ if (tmpent->start > tmpent->end || tmpent->end > end) {
+ printf("tmpent->start=0x%lx, tmpent->end=0x%lx, end=0x%lx\n",
+ tmpent->start, tmpent->end, end);
+ panic("uvm_map_replace2");
+ }
+ cur = tmpent->end;
+ if (tmpent->next) {
+ if (tmpent->next->prev != tmpent)
+ panic("uvm_map_replace3");
+ } else {
+ if (newents->prev != tmpent)
+ panic("uvm_map_replace4");
+ }
+ tmpent = tmpent->next;
+ }
+ if (nent != nnewents)
+ panic("uvm_map_replace5");
+ }
+#endif
+
+ /*
+ * map entry is a valid blank! replace it. (this does all the
+ * work of map entry link/unlink...).
+ */
+
+ if (newents) {
+
+ last = newents->prev; /* we expect this */
+
+ /* critical: flush stale hints out of map */
+ SAVE_HINT(map, newents);
+ if (map->first_free == oldent)
+ map->first_free = last;
+
+ last->next = oldent->next;
+ last->next->prev = last;
+ newents->prev = oldent->prev;
+ newents->prev->next = newents;
+ map->nentries = map->nentries + (nnewents - 1);
+
+ } else {
+
+ /* critical: flush stale hints out of map */
+ SAVE_HINT(map, oldent->prev);
+ if (map->first_free == oldent)
+ map->first_free = oldent->prev;
+
+ /* NULL list of new entries: just remove the old one */
+ uvm_map_entry_unlink(map, oldent);
+ }
+
+
+ /*
+ * now we can free the old blank entry, unlock the map and return.
+ */
+
+ uvm_mapent_free(oldent);
+ return(TRUE);
+}
+
+/*
+ * uvm_map_extract: extract a mapping from a map and put it somewhere
+ * (maybe removing the old mapping)
+ *
+ * => maps should be unlocked (we will write lock them)
+ * => returns 0 on success, error code otherwise
+ * => start must be page aligned
+ * => len must be page sized
+ * => flags:
+ * UVM_EXTRACT_REMOVE: remove mappings from srcmap
+ * UVM_EXTRACT_CONTIG: abort if unmapped area (advisory only)
+ * UVM_EXTRACT_QREF: for a temporary extraction do quick obj refs
+ * UVM_EXTRACT_FIXPROT: set prot to maxprot as we go
+ * >>>NOTE: if you set REMOVE, you are not allowed to use CONTIG or QREF!<<<
+ * >>>NOTE: QREF's must be unmapped via the QREF path, thus should only
+ * be used from within the kernel in a kernel level map <<<
+ */
+
+int
+uvm_map_extract(srcmap, start, len, dstmap, dstaddrp, flags)
+ vm_map_t srcmap, dstmap;
+ vaddr_t start, *dstaddrp;
+ vsize_t len;
+ int flags;
+{
+ vaddr_t dstaddr, end, newend, oldoffset, fudge, orig_fudge,
+ oldstart;
+ vm_map_entry_t chain, endchain, entry, orig_entry, newentry, deadentry;
+ vm_map_entry_t oldentry;
+ vsize_t elen;
+ int nchain, error, copy_ok;
+ UVMHIST_FUNC("uvm_map_extract"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(srcmap=0x%x,start=0x%x, len=0x%x", srcmap, start,
+ len,0);
+ UVMHIST_LOG(maphist," ...,dstmap=0x%x, flags=0x%x)", dstmap,flags,0,0);
+
+#ifdef DIAGNOSTIC
+ /*
+ * step 0: sanity check: start must be on a page boundary, length
+ * must be page sized. can't ask for CONTIG/QREF if you asked for
+ * REMOVE.
+ */
+ if ((start & PAGE_MASK) || (len & PAGE_MASK))
+ panic("uvm_map_extract1");
+ if (flags & UVM_EXTRACT_REMOVE)
+ if (flags & (UVM_EXTRACT_CONTIG|UVM_EXTRACT_QREF))
+ panic("uvm_map_extract2");
+#endif
+
+
+ /*
+ * step 1: reserve space in the target map for the extracted area
+ */
+
+ dstaddr = *dstaddrp;
+ if (uvm_map_reserve(dstmap, len, start, &dstaddr) == FALSE)
+ return(ENOMEM);
+ *dstaddrp = dstaddr; /* pass address back to caller */
+ UVMHIST_LOG(maphist, " dstaddr=0x%x", dstaddr,0,0,0);
+
+
+ /*
+ * step 2: setup for the extraction process loop by init'ing the
+ * map entry chain, locking src map, and looking up the first useful
+ * entry in the map.
+ */
+
+ end = start + len;
+ newend = dstaddr + len;
+ chain = endchain = NULL;
+ nchain = 0;
+ vm_map_lock(srcmap);
+
+ if (uvm_map_lookup_entry(srcmap, start, &entry)) {
+
+ /* "start" is within an entry */
+ if (flags & UVM_EXTRACT_QREF) {
+ /*
+ * for quick references we don't clip the entry, so
+ * the entry may map space "before" the starting
+ * virtual address... this is the "fudge" factor
+ * (which can be non-zero only the first time
+ * through the "while" loop in step 3).
+ */
+ fudge = start - entry->start;
+ } else {
+ /*
+ * normal reference: we clip the map to fit (thus
+ * fudge is zero)
+ */
+ UVM_MAP_CLIP_START(srcmap, entry, start);
+ SAVE_HINT(srcmap, entry->prev);
+ fudge = 0;
+ }
+
+ } else {
+
+ /* "start" is not within an entry ... skip to next entry */
+ if (flags & UVM_EXTRACT_CONTIG) {
+ error = EINVAL;
+ goto bad; /* definite hole here ... */
+ }
+
+ entry = entry->next;
+ fudge = 0;
+ }
+ /* save values from srcmap for step 6 */
+ orig_entry = entry;
+ orig_fudge = fudge;
+
+
+ /*
+ * step 3: now start looping through the map entries, extracting
+ * as we go.
+ */
+
+ while (entry->start < end && entry != &srcmap->header) {
+
+ /* if we are not doing a quick reference, clip it */
+ if ((flags & UVM_EXTRACT_QREF) == 0)
+ UVM_MAP_CLIP_END(srcmap, entry, end);
+
+ /* clear needs_copy (allow chunking) */
+ if (UVM_ET_ISNEEDSCOPY(entry)) {
+ if (fudge)
+ oldstart = entry->start;
+ else
+ oldstart = 0; /* XXX: gcc */
+ amap_copy(srcmap, entry, M_NOWAIT, TRUE, start, end);
+ if (UVM_ET_ISNEEDSCOPY(entry)) { /* failed? */
+ error = ENOMEM;
+ goto bad;
+ }
+ /* amap_copy could clip (during chunk)! update fudge */
+ if (fudge) {
+ fudge = fudge - (entry->start - oldstart);
+ orig_fudge = fudge;
+ }
+ }
+
+ /* calculate the offset of this from "start" */
+ oldoffset = (entry->start + fudge) - start;
+
+ /* allocate a new map entry */
+ newentry = uvm_mapent_alloc(dstmap);
+ if (newentry == NULL) {
+ error = ENOMEM;
+ goto bad;
+ }
+
+ /* set up new map entry */
+ newentry->next = NULL;
+ newentry->prev = endchain;
+ newentry->start = dstaddr + oldoffset;
+ newentry->end =
+ newentry->start + (entry->end - (entry->start + fudge));
+ if (newentry->end > newend)
+ newentry->end = newend;
+ newentry->object.uvm_obj = entry->object.uvm_obj;
+ if (newentry->object.uvm_obj) {
+ if (newentry->object.uvm_obj->pgops->pgo_reference)
+ newentry->object.uvm_obj->pgops->
+ pgo_reference(newentry->object.uvm_obj);
+ newentry->offset = entry->offset + fudge;
+ } else {
+ newentry->offset = 0;
+ }
+ newentry->etype = entry->etype;
+ newentry->protection = (flags & UVM_EXTRACT_FIXPROT) ?
+ entry->max_protection : entry->protection;
+ newentry->max_protection = entry->max_protection;
+ newentry->inheritance = entry->inheritance;
+ newentry->wired_count = 0;
+ newentry->aref.ar_amap = entry->aref.ar_amap;
+ if (newentry->aref.ar_amap) {
+ newentry->aref.ar_pageoff =
+ entry->aref.ar_pageoff + (fudge >> PAGE_SHIFT);
+ amap_ref(newentry, AMAP_SHARED |
+ ((flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0));
+ } else {
+ newentry->aref.ar_pageoff = 0;
+ }
+ newentry->advice = entry->advice;
+
+ /* now link it on the chain */
+ nchain++;
+ if (endchain == NULL) {
+ chain = endchain = newentry;
+ } else {
+ endchain->next = newentry;
+ endchain = newentry;
+ }
+
+ /* end of 'while' loop! */
+ if ((flags & UVM_EXTRACT_CONTIG) && entry->end < end &&
+ (entry->next == &srcmap->header ||
+ entry->next->start != entry->end)) {
+ error = EINVAL;
+ goto bad;
+ }
+ entry = entry->next;
+ fudge = 0;
+ }
+
+
+ /*
+ * step 4: close off chain (in format expected by uvm_map_replace)
+ */
+
+ if (chain)
+ chain->prev = endchain;
+
+
+ /*
+ * step 5: attempt to lock the dest map so we can pmap_copy.
+ * note usage of copy_ok:
+ * 1 => dstmap locked, pmap_copy ok, and we "replace" here (step 5)
+ * 0 => dstmap unlocked, NO pmap_copy, and we will "replace" in step 7
+ */
+
+ if (srcmap == dstmap || vm_map_lock_try(dstmap) == TRUE) {
+
+ copy_ok = 1;
+ if (!uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
+ nchain)) {
+ if (srcmap != dstmap)
+ vm_map_unlock(dstmap);
+ error = EIO;
+ goto bad;
+ }
+
+ } else {
+
+ copy_ok = 0;
+ /* replace defered until step 7 */
+
+ }
+
+
+ /*
+ * step 6: traverse the srcmap a second time to do the following:
+ * - if we got a lock on the dstmap do pmap_copy
+ * - if UVM_EXTRACT_REMOVE remove the entries
+ * we make use of orig_entry and orig_fudge (saved in step 2)
+ */
+
+ if (copy_ok || (flags & UVM_EXTRACT_REMOVE)) {
+
+ /* purge possible stale hints from srcmap */
+ if (flags & UVM_EXTRACT_REMOVE) {
+ SAVE_HINT(srcmap, orig_entry->prev);
+ if (srcmap->first_free->start >= start)
+ srcmap->first_free = orig_entry->prev;
+ }
+
+ entry = orig_entry;
+ fudge = orig_fudge;
+ deadentry = NULL; /* for UVM_EXTRACT_REMOVE */
+
+ while (entry->start < end && entry != &srcmap->header) {
+
+ if (copy_ok) {
+ oldoffset = (entry->start + fudge) - start;
+ elen = min(end, entry->end) - (entry->start + fudge);
+ pmap_copy(dstmap->pmap, srcmap->pmap, dstaddr + oldoffset,
+ elen, entry->start + fudge);
+ }
+
+ /* we advance "entry" in the following if statement */
+ if (flags & UVM_EXTRACT_REMOVE) {
+ pmap_remove(srcmap->pmap, entry->start,
+ entry->end);
+ oldentry = entry; /* save entry */
+ entry = entry->next; /* advance */
+ uvm_map_entry_unlink(srcmap, oldentry);
+ /* add to dead list */
+ oldentry->next = deadentry;
+ deadentry = oldentry;
+ } else {
+ entry = entry->next; /* advance */
+ }
+
+ /* end of 'while' loop */
+ fudge = 0;
+ }
+
+ /*
+ * unlock dstmap. we will dispose of deadentry in
+ * step 7 if needed
+ */
+ if (copy_ok && srcmap != dstmap)
+ vm_map_unlock(dstmap);
+
+ }
+ else
+ deadentry = NULL; /* XXX: gcc */
+
+ /*
+ * step 7: we are done with the source map, unlock. if copy_ok
+ * is 0 then we have not replaced the dummy mapping in dstmap yet
+ * and we need to do so now.
+ */
+
+ vm_map_unlock(srcmap);
+ if ((flags & UVM_EXTRACT_REMOVE) && deadentry)
+ uvm_unmap_detach(deadentry, 0); /* dispose of old entries */
+
+ /* now do the replacement if we didn't do it in step 5 */
+ if (copy_ok == 0) {
+ vm_map_lock(dstmap);
+ error = uvm_map_replace(dstmap, dstaddr, dstaddr+len, chain,
+ nchain);
+ vm_map_unlock(dstmap);
+
+ if (error == FALSE) {
+ error = EIO;
+ goto bad2;
+ }
+ }
+
+ /*
+ * done!
+ */
+ return(0);
+
+ /*
+ * bad: failure recovery
+ */
+bad:
+ vm_map_unlock(srcmap);
+bad2: /* src already unlocked */
+ if (chain)
+ uvm_unmap_detach(chain,
+ (flags & UVM_EXTRACT_QREF) ? AMAP_REFALL : 0);
+ uvm_unmap(dstmap, dstaddr, dstaddr+len); /* ??? */
+ return(error);
+}
+
+/* end of extraction functions */
+
+/*
+ * uvm_map_submap: punch down part of a map into a submap
+ *
+ * => only the kernel_map is allowed to be submapped
+ * => the purpose of submapping is to break up the locking granularity
+ * of a larger map
+ * => the range specified must have been mapped previously with a uvm_map()
+ * call [with uobj==NULL] to create a blank map entry in the main map.
+ * [And it had better still be blank!]
+ * => maps which contain submaps should never be copied or forked.
+ * => to remove a submap, use uvm_unmap() on the main map
+ * and then uvm_map_deallocate() the submap.
+ * => main map must be unlocked.
+ * => submap must have been init'd and have a zero reference count.
+ * [need not be locked as we don't actually reference it]
+ */
+
+int
+uvm_map_submap(map, start, end, submap)
+ vm_map_t map, submap;
+ vaddr_t start, end;
+{
+ vm_map_entry_t entry;
+ int result;
+ UVMHIST_FUNC("uvm_map_submap"); UVMHIST_CALLED(maphist);
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (uvm_map_lookup_entry(map, start, &entry)) {
+ UVM_MAP_CLIP_START(map, entry, start);
+ UVM_MAP_CLIP_END(map, entry, end); /* to be safe */
+ }
+ else {
+ entry = NULL;
+ }
+
+ if (entry != NULL &&
+ entry->start == start && entry->end == end &&
+ entry->object.uvm_obj == NULL && entry->aref.ar_amap == NULL &&
+ !UVM_ET_ISCOPYONWRITE(entry) && !UVM_ET_ISNEEDSCOPY(entry)) {
+
+ /*
+ * doit!
+ */
+ entry->etype |= UVM_ET_SUBMAP;
+ entry->object.sub_map = submap;
+ entry->offset = 0;
+ uvm_map_reference(submap);
+ result = KERN_SUCCESS;
+ } else {
+ result = KERN_INVALID_ARGUMENT;
+ }
+ vm_map_unlock(map);
+
+ return(result);
+}
+
+
+/*
+ * uvm_map_protect: change map protection
+ *
+ * => set_max means set max_protection.
+ * => map must be unlocked.
+ * => XXXCDC: does not work properly with share maps. rethink.
+ */
+
+#define MASK(entry) ( UVM_ET_ISCOPYONWRITE(entry) ? \
+ ~VM_PROT_WRITE : VM_PROT_ALL)
+#define max(a,b) ((a) > (b) ? (a) : (b))
+
+int
+uvm_map_protect(map, start, end, new_prot, set_max)
+ vm_map_t map;
+ vaddr_t start, end;
+ vm_prot_t new_prot;
+ boolean_t set_max;
+{
+ vm_map_entry_t current, entry;
+ UVMHIST_FUNC("uvm_map_protect"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_prot=0x%x)",
+ map, start, end, new_prot);
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (uvm_map_lookup_entry(map, start, &entry)) {
+ UVM_MAP_CLIP_START(map, entry, start);
+ } else {
+ entry = entry->next;
+ }
+
+ /*
+ * make a first pass to check for protection violations.
+ */
+
+ current = entry;
+ while ((current != &map->header) && (current->start < end)) {
+ if (UVM_ET_ISSUBMAP(current))
+ return(KERN_INVALID_ARGUMENT);
+ if ((new_prot & current->max_protection) != new_prot) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+ current = current->next;
+ }
+
+ /* go back and fix up protections (no need to clip this time). */
+
+ current = entry;
+
+ while ((current != &map->header) && (current->start < end)) {
+ vm_prot_t old_prot;
+
+ UVM_MAP_CLIP_END(map, current, end);
+
+ old_prot = current->protection;
+ if (set_max)
+ current->protection =
+ (current->max_protection = new_prot) & old_prot;
+ else
+ current->protection = new_prot;
+
+ /*
+ * update physical map if necessary. worry about copy-on-write
+ * here -- CHECK THIS XXX
+ */
+
+ if (current->protection != old_prot) {
+
+ /* update pmap! */
+ pmap_protect(map->pmap, current->start, current->end,
+ current->protection & MASK(entry));
+
+ }
+ current = current->next;
+ }
+
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+ return(KERN_SUCCESS);
+}
+
+#undef max
+#undef MASK
+
+/*
+ * uvm_map_inherit: set inheritance code for range of addrs in map.
+ *
+ * => map must be unlocked
+ * => note that the inherit code is used during a "fork". see fork
+ * code for details.
+ * => XXXCDC: currently only works in main map. what about share map?
+ */
+
+int
+uvm_map_inherit(map, start, end, new_inheritance)
+ vm_map_t map;
+ vaddr_t start;
+ vaddr_t end;
+ vm_inherit_t new_inheritance;
+{
+ vm_map_entry_t entry, temp_entry;
+ UVMHIST_FUNC("uvm_map_inherit"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_inh=0x%x)",
+ map, start, end, new_inheritance);
+
+ switch (new_inheritance) {
+ case VM_INHERIT_NONE:
+ case VM_INHERIT_COPY:
+ case VM_INHERIT_SHARE:
+ break;
+ default:
+ UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+ return(KERN_INVALID_ARGUMENT);
+ }
+
+ vm_map_lock(map);
+
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ if (uvm_map_lookup_entry(map, start, &temp_entry)) {
+ entry = temp_entry;
+ UVM_MAP_CLIP_START(map, entry, start);
+ } else {
+ entry = temp_entry->next;
+ }
+
+ while ((entry != &map->header) && (entry->start < end)) {
+ UVM_MAP_CLIP_END(map, entry, end);
+
+ entry->inheritance = new_inheritance;
+
+ entry = entry->next;
+ }
+
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_pageable: sets the pageability of a range in a map.
+ *
+ * => regions sepcified as not pageable require lock-down (wired) memory
+ * and page tables.
+ * => map must not be locked.
+ * => XXXCDC: check this and try and clean it up.
+ */
+
+int
+uvm_map_pageable(map, start, end, new_pageable)
+ vm_map_t map;
+ vaddr_t start, end;
+ boolean_t new_pageable;
+{
+ vm_map_entry_t entry, start_entry;
+ vaddr_t failed = 0;
+ int rv;
+ UVMHIST_FUNC("uvm_map_pageable"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
+ map, start, end, new_pageable);
+
+ vm_map_lock(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+
+ /*
+ * only one pageability change may take place at one time, since
+ * uvm_fault_wire assumes it will be called only once for each
+ * wiring/unwiring. therefore, we have to make sure we're actually
+ * changing the pageability for the entire region. we do so before
+ * making any changes.
+ */
+
+ if (uvm_map_lookup_entry(map, start, &start_entry) == FALSE) {
+ vm_map_unlock(map);
+
+ UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+ return (KERN_INVALID_ADDRESS);
+ }
+ entry = start_entry;
+
+ /*
+ * handle wiring and unwiring seperately.
+ */
+
+ if (new_pageable) { /* unwire */
+
+ UVM_MAP_CLIP_START(map, entry, start);
+
+ /*
+ * unwiring. first ensure that the range to be unwired is
+ * really wired down and that there are no holes.
+ */
+ while ((entry != &map->header) && (entry->start < end)) {
+
+ if (entry->wired_count == 0 ||
+ (entry->end < end &&
+ (entry->next == &map->header ||
+ entry->next->start > entry->end))) {
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,
+ "<- done (INVALID UNWIRE ARG)",0,0,0,0);
+ return (KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * now decrement the wiring count for each region. if a region
+ * becomes completely unwired, unwire its physical pages and
+ * mappings.
+ */
+#if 0 /* not necessary: uvm_fault_unwire does not lock */
+ lock_set_recursive(&map->lock);
+#endif /* XXXCDC */
+
+ entry = start_entry;
+ while ((entry != &map->header) && (entry->start < end)) {
+ UVM_MAP_CLIP_END(map, entry, end);
+
+ entry->wired_count--;
+ if (entry->wired_count == 0)
+ uvm_map_entry_unwire(map, entry);
+
+ entry = entry->next;
+ }
+#if 0 /* XXXCDC: not necessary, see above */
+ lock_clear_recursive(&map->lock);
+#endif
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
+ return(KERN_SUCCESS);
+
+ /*
+ * end of unwire case!
+ */
+ }
+
+ /*
+ * wire case: in two passes [XXXCDC: ugly block of code here]
+ *
+ * 1: holding the write lock, we create any anonymous maps that need
+ * to be created. then we clip each map entry to the region to
+ * be wired and increment its wiring count.
+ *
+ * 2: we downgrade to a read lock, and call uvm_fault_wire to fault
+ * in the pages for any newly wired area (wired_count is 1).
+ *
+ * downgrading to a read lock for uvm_fault_wire avoids a possible
+ * deadlock with another thread that may have faulted on one of
+ * the pages to be wired (it would mark the page busy, blocking
+ * us, then in turn block on the map lock that we hold). because
+ * of problems in the recursive lock package, we cannot upgrade
+ * to a write lock in vm_map_lookup. thus, any actions that
+ * require the write lock must be done beforehand. because we
+ * keep the read lock on the map, the copy-on-write status of the
+ * entries we modify here cannot change.
+ */
+
+ while ((entry != &map->header) && (entry->start < end)) {
+
+ if (entry->wired_count == 0) { /* not already wired? */
+
+ /*
+ * perform actions of vm_map_lookup that need the
+ * write lock on the map: create an anonymous map
+ * for a copy-on-write region, or an anonymous map
+ * for a zero-fill region. (XXXCDC: submap case
+ * ok?)
+ */
+
+ if (!UVM_ET_ISSUBMAP(entry)) { /* not submap */
+ /*
+ * XXXCDC: protection vs. max_protection??
+ * (wirefault uses max?)
+ * XXXCDC: used to do it always if
+ * uvm_obj == NULL (wrong?)
+ */
+ if ( UVM_ET_ISNEEDSCOPY(entry) &&
+ (entry->protection & VM_PROT_WRITE) != 0) {
+ amap_copy(map, entry, M_WAITOK, TRUE,
+ start, end);
+ /* XXXCDC: wait OK? */
+ }
+ }
+ } /* wired_count == 0 */
+ UVM_MAP_CLIP_START(map, entry, start);
+ UVM_MAP_CLIP_END(map, entry, end);
+ entry->wired_count++;
+
+ /*
+ * Check for holes
+ */
+ if (entry->end < end && (entry->next == &map->header ||
+ entry->next->start > entry->end)) {
+ /*
+ * found one. amap creation actions do not need to
+ * be undone, but the wired counts need to be restored.
+ */
+ while (entry != &map->header && entry->end > start) {
+ entry->wired_count--;
+ entry = entry->prev;
+ }
+ vm_map_unlock(map);
+ UVMHIST_LOG(maphist,"<- done (INVALID WIRE)",0,0,0,0);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ entry = entry->next;
+ }
+
+ /*
+ * Pass 2.
+ */
+ /*
+ * HACK HACK HACK HACK
+ *
+ * if we are wiring in the kernel map or a submap of it, unlock the
+ * map to avoid deadlocks. we trust that the kernel threads are
+ * well-behaved, and therefore will not do anything destructive to
+ * this region of the map while we have it unlocked. we cannot
+ * trust user threads to do the same.
+ *
+ * HACK HACK HACK HACK
+ */
+ if (vm_map_pmap(map) == pmap_kernel()) {
+ vm_map_unlock(map); /* trust me ... */
+ } else {
+ vm_map_set_recursive(&map->lock);
+ lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc /*XXX*/);
+ }
+
+ rv = 0;
+ entry = start_entry;
+ while (entry != &map->header && entry->start < end) {
+ /*
+ * if uvm_fault_wire fails for any page we need to undo what has
+ * been done. we decrement the wiring count for those pages
+ * which have not yet been wired (now) and unwire those that
+ * have * (later).
+ *
+ * XXX this violates the locking protocol on the map, needs to
+ * be fixed. [because we only have a read lock on map we
+ * shouldn't be changing wired_count?]
+ */
+ if (rv) {
+ entry->wired_count--;
+ } else if (entry->wired_count == 1) {
+ rv = uvm_fault_wire(map, entry->start, entry->end);
+ if (rv) {
+ failed = entry->start;
+ entry->wired_count--;
+ }
+ }
+ entry = entry->next;
+ }
+
+ if (vm_map_pmap(map) == pmap_kernel()) {
+ vm_map_lock(map); /* relock */
+ } else {
+ vm_map_clear_recursive(&map->lock);
+ }
+
+ if (rv) { /* failed? */
+ vm_map_unlock(map);
+ (void) uvm_map_pageable(map, start, failed, TRUE);
+ UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
+ return(rv);
+ }
+ vm_map_unlock(map);
+
+ UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
+ return(KERN_SUCCESS);
+}
+
+/*
+ * uvm_map_clean: push dirty pages off to backing store.
+ *
+ * => valid flags:
+ * if (flags & PGO_SYNCIO): dirty pages are written synchronously
+ * if (flags & PGO_DEACTIVATE): any cached pages are deactivated after clean
+ * if (flags & PGO_FREE): any cached pages are freed after clean
+ * => returns an error if any part of the specified range isn't mapped
+ * => never a need to flush amap layer since the anonymous memory has
+ * no permanent home...
+ * => called from sys_msync()
+ * => caller must not write-lock map (read OK).
+ * => we may sleep while cleaning if SYNCIO [with map read-locked]
+ * => XXX: does this handle share maps properly?
+ */
+
+int
+uvm_map_clean(map, start, end, flags)
+ vm_map_t map;
+ vaddr_t start, end;
+ int flags;
+{
+ vm_map_entry_t current;
+ vm_map_entry_t entry;
+ vsize_t size;
+ struct uvm_object *object;
+ vaddr_t offset;
+ UVMHIST_FUNC("uvm_map_clean"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,flags=0x%x)",
+ map, start, end, flags);
+
+ vm_map_lock_read(map);
+ VM_MAP_RANGE_CHECK(map, start, end);
+ if (!uvm_map_lookup_entry(map, start, &entry)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+
+ /*
+ * Make a first pass to check for holes.
+ */
+ for (current = entry; current->start < end; current = current->next) {
+ if (UVM_ET_ISSUBMAP(current)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ARGUMENT);
+ }
+ if (end > current->end && (current->next == &map->header ||
+ current->end != current->next->start)) {
+ vm_map_unlock_read(map);
+ return(KERN_INVALID_ADDRESS);
+ }
+ }
+
+ /*
+ * add "cleanit" flag to flags (for generic flush routine).
+ * then make a second pass, cleaning/uncaching pages from
+ * the indicated objects as we go.
+ */
+ flags = flags | PGO_CLEANIT;
+ for (current = entry; current->start < end; current = current->next) {
+ offset = current->offset + (start - current->start);
+ size = (end <= current->end ? end : current->end) - start;
+
+ /*
+ * get object/offset. can't be submap (checked above).
+ */
+ object = current->object.uvm_obj;
+ simple_lock(&object->vmobjlock);
+
+ /*
+ * flush pages if we've got a valid backing object.
+ * note that object is locked.
+ * XXX should we continue on an error?
+ */
+
+ if (object && object->pgops) {
+ if (!object->pgops->pgo_flush(object, offset,
+ offset+size, flags)) {
+ simple_unlock(&object->vmobjlock);
+ vm_map_unlock_read(map);
+ return (KERN_FAILURE);
+ }
+ }
+ simple_unlock(&object->vmobjlock);
+ start += size;
+ }
+ vm_map_unlock_read(map);
+ return(KERN_SUCCESS);
+}
+
+
+/*
+ * uvm_map_checkprot: check protection in map
+ *
+ * => must allow specified protection in a fully allocated region.
+ * => map must be read or write locked by caller.
+ */
+
+boolean_t
+uvm_map_checkprot(map, start, end, protection)
+ vm_map_t map;
+ vaddr_t start, end;
+ vm_prot_t protection;
+{
+ vm_map_entry_t entry;
+ vm_map_entry_t tmp_entry;
+
+ if (!uvm_map_lookup_entry(map, start, &tmp_entry)) {
+ return(FALSE);
+ }
+
+ entry = tmp_entry;
+
+ while (start < end) {
+ if (entry == &map->header) {
+ return(FALSE);
+ }
+
+ /*
+ * no holes allowed
+ */
+
+ if (start < entry->start) {
+ return(FALSE);
+ }
+
+ /*
+ * check protection associated with entry
+ */
+
+ if ((entry->protection & protection) != protection) {
+ return(FALSE);
+ }
+
+ /* go to next entry */
+
+ start = entry->end;
+ entry = entry->next;
+ }
+ return(TRUE);
+}
+
+/*
+ * uvmspace_alloc: allocate a vmspace structure.
+ *
+ * - structure includes vm_map and pmap
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must be init'd by caller
+ */
+struct vmspace *
+uvmspace_alloc(min, max, pageable)
+ vaddr_t min, max;
+ int pageable;
+{
+ struct vmspace *vm;
+ UVMHIST_FUNC("uvmspace_alloc"); UVMHIST_CALLED(maphist);
+
+ vm = pool_get(&uvm_vmspace_pool, PR_WAITOK);
+ uvmspace_init(vm, NULL, min, max, pageable);
+ UVMHIST_LOG(maphist,"<- done (vm=0x%x)", vm,0,0,0);
+ return (vm);
+}
+
+/*
+ * uvmspace_init: initialize a vmspace structure.
+ *
+ * - XXX: no locking on this structure
+ * - refcnt set to 1, rest must me init'd by caller
+ */
+void
+uvmspace_init(vm, pmap, min, max, pageable)
+ struct vmspace *vm;
+ struct pmap *pmap;
+ vaddr_t min, max;
+ boolean_t pageable;
+{
+ UVMHIST_FUNC("uvmspace_init"); UVMHIST_CALLED(maphist);
+
+ bzero(vm, sizeof(*vm));
+
+ uvm_map_setup(&vm->vm_map, min, max, pageable);
+
+ if (pmap)
+ pmap_reference(pmap);
+ else
+#if defined(PMAP_NEW)
+ pmap = pmap_create();
+#else
+ pmap = pmap_create(0);
+#endif
+ vm->vm_map.pmap = pmap;
+
+ vm->vm_refcnt = 1;
+ UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+}
+
+/*
+ * uvmspace_share: share a vmspace between two proceses
+ *
+ * - XXX: no locking on vmspace
+ * - used for vfork, threads(?)
+ */
+
+void
+uvmspace_share(p1, p2)
+ struct proc *p1, *p2;
+{
+ p2->p_vmspace = p1->p_vmspace;
+ p1->p_vmspace->vm_refcnt++;
+}
+
+/*
+ * uvmspace_unshare: ensure that process "p" has its own, unshared, vmspace
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_unshare(p)
+ struct proc *p;
+{
+ struct vmspace *nvm, *ovm = p->p_vmspace;
+ int s;
+
+ if (ovm->vm_refcnt == 1)
+ /* nothing to do: vmspace isn't shared in the first place */
+ return;
+
+ /* make a new vmspace, still holding old one */
+ nvm = uvmspace_fork(ovm);
+
+ s = splhigh(); /* make this `atomic' */
+ pmap_deactivate(p);
+ /* unbind old vmspace */
+ p->p_vmspace = nvm;
+ pmap_activate(p);
+ /* switch to new vmspace */
+ splx(s); /* end of critical section */
+
+ uvmspace_free(ovm); /* drop reference to old vmspace */
+}
+
+/*
+ * uvmspace_exec: the process wants to exec a new program
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_exec(p)
+ struct proc *p;
+{
+ struct vmspace *nvm, *ovm = p->p_vmspace;
+ vm_map_t map = &ovm->vm_map;
+ int s;
+
+#ifdef sparc
+ /* XXX cgd 960926: the sparc #ifdef should be a MD hook */
+ kill_user_windows(p); /* before stack addresses go away */
+#endif
+
+ /*
+ * see if more than one process is using this vmspace...
+ */
+
+ if (ovm->vm_refcnt == 1) {
+
+ /*
+ * if p is the only process using its vmspace then we can safely
+ * recycle that vmspace for the program that is being exec'd.
+ */
+
+#ifdef SYSVSHM
+ /*
+ * SYSV SHM semantics require us to kill all segments on an exec
+ */
+ if (ovm->vm_shm)
+ shmexit(ovm);
+#endif
+
+ /*
+ * now unmap the old program
+ */
+ uvm_unmap(map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS);
+
+ } else {
+
+ /*
+ * p's vmspace is being shared, so we can't reuse it for p since
+ * it is still being used for others. allocate a new vmspace
+ * for p
+ */
+ nvm = uvmspace_alloc(map->min_offset, map->max_offset,
+ map->entries_pageable);
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+ /*
+ * allocate zero fill area in the new vmspace's map for user
+ * page tables for ports that have old style pmaps that keep
+ * user page tables in the top part of the process' address
+ * space.
+ *
+ * XXXCDC: this should go away once all pmaps are fixed
+ */
+ {
+ vaddr_t addr = VM_MAXUSER_ADDRESS;
+ if (uvm_map(&nvm->vm_map, &addr, VM_MAX_ADDRESS - addr,
+ NULL, UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+ UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_NORMAL,
+ UVM_FLAG_FIXED|UVM_FLAG_COPYONW)) != KERN_SUCCESS)
+ panic("vm_allocate of PT page area failed");
+ }
+#endif
+
+ /*
+ * install new vmspace and drop our ref to the old one.
+ */
+
+ s = splhigh();
+ pmap_deactivate(p);
+ p->p_vmspace = nvm;
+ pmap_activate(p);
+ splx(s);
+
+ uvmspace_free(ovm);
+ }
+}
+
+/*
+ * uvmspace_free: free a vmspace data structure
+ *
+ * - XXX: no locking on vmspace
+ */
+
+void
+uvmspace_free(vm)
+ struct vmspace *vm;
+{
+ vm_map_entry_t dead_entries;
+ UVMHIST_FUNC("uvmspace_free"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist,"(vm=0x%x) ref=%d", vm, vm->vm_refcnt,0,0);
+ if (--vm->vm_refcnt == 0) {
+ /*
+ * lock the map, to wait out all other references to it. delete
+ * all of the mappings and pages they hold, then call the pmap
+ * module to reclaim anything left.
+ */
+ vm_map_lock(&vm->vm_map);
+ if (vm->vm_map.nentries) {
+ (void)uvm_unmap_remove(&vm->vm_map,
+ vm->vm_map.min_offset, vm->vm_map.max_offset,
+ &dead_entries);
+ if (dead_entries != NULL)
+ uvm_unmap_detach(dead_entries, 0);
+ }
+ pmap_destroy(vm->vm_map.pmap);
+ vm->vm_map.pmap = NULL;
+ pool_put(&uvm_vmspace_pool, vm);
+ }
+ UVMHIST_LOG(maphist,"<- done", 0,0,0,0);
+}
+
+/*
+ * F O R K - m a i n e n t r y p o i n t
+ */
+/*
+ * uvmspace_fork: fork a process' main map
+ *
+ * => create a new vmspace for child process from parent.
+ * => parent's map must not be locked.
+ */
+
+struct vmspace *
+uvmspace_fork(vm1)
+ struct vmspace *vm1;
+{
+ struct vmspace *vm2;
+ vm_map_t old_map = &vm1->vm_map;
+ vm_map_t new_map;
+ vm_map_entry_t old_entry;
+ vm_map_entry_t new_entry;
+ pmap_t new_pmap;
+ boolean_t protect_child;
+ UVMHIST_FUNC("uvmspace_fork"); UVMHIST_CALLED(maphist);
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+ /*
+ * avoid copying any of the parent's pagetables or other per-process
+ * objects that reside in the map by marking all of them non-inheritable
+ * XXXCDC: should go away
+ */
+ (void) uvm_map_inherit(old_map, VM_MAXUSER_ADDRESS, VM_MAX_ADDRESS,
+ VM_INHERIT_NONE);
+#endif
+
+ vm_map_lock(old_map);
+
+ vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
+ old_map->entries_pageable);
+ bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
+ (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
+ new_map = &vm2->vm_map; /* XXX */
+ new_pmap = new_map->pmap;
+
+ old_entry = old_map->header.next;
+
+ /*
+ * go entry-by-entry
+ */
+
+ while (old_entry != &old_map->header) {
+
+ /*
+ * first, some sanity checks on the old entry
+ */
+ if (UVM_ET_ISSUBMAP(old_entry))
+ panic("fork: encountered a submap during fork (illegal)");
+
+ if (!UVM_ET_ISCOPYONWRITE(old_entry) &&
+ UVM_ET_ISNEEDSCOPY(old_entry))
+ panic("fork: non-copy_on_write map entry marked needs_copy (illegal)");
+
+
+ switch (old_entry->inheritance) {
+ case VM_INHERIT_NONE:
+ /*
+ * drop the mapping
+ */
+ break;
+
+ case VM_INHERIT_SHARE:
+ /*
+ * share the mapping: this means we want the old and
+ * new entries to share amaps and backing objects.
+ */
+
+ /*
+ * if the old_entry needs a new amap (due to prev fork)
+ * then we need to allocate it now so that we have
+ * something we own to share with the new_entry. [in
+ * other words, we need to clear needs_copy]
+ */
+
+ if (UVM_ET_ISNEEDSCOPY(old_entry)) {
+ /* get our own amap, clears needs_copy */
+ amap_copy(old_map, old_entry, M_WAITOK, FALSE,
+ 0, 0);
+ /* XXXCDC: WAITOK??? */
+ }
+
+ new_entry = uvm_mapent_alloc(new_map);
+ /* old_entry -> new_entry */
+ uvm_mapent_copy(old_entry, new_entry);
+
+ /* new pmap has nothing wired in it */
+ new_entry->wired_count = 0;
+
+ /*
+ * gain reference to object backing the map (can't
+ * be a submap, already checked this case).
+ */
+ if (new_entry->aref.ar_amap)
+ /* share reference */
+ amap_ref(new_entry, AMAP_SHARED);
+
+ if (new_entry->object.uvm_obj &&
+ new_entry->object.uvm_obj->pgops->pgo_reference)
+ new_entry->object.uvm_obj->
+ pgops->pgo_reference(
+ new_entry->object.uvm_obj);
+
+ /* insert entry at end of new_map's entry list */
+ uvm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+
+ /*
+ * pmap_copy the mappings: this routine is optional
+ * but if it is there it will reduce the number of
+ * page faults in the new proc.
+ */
+
+ pmap_copy(new_pmap, old_map->pmap, new_entry->start,
+ (old_entry->end - old_entry->start),
+ old_entry->start);
+
+ break;
+
+ case VM_INHERIT_COPY:
+
+ /*
+ * copy-on-write the mapping (using mmap's
+ * MAP_PRIVATE semantics)
+ *
+ * allocate new_entry, adjust reference counts.
+ * (note that new references are read-only).
+ */
+
+ new_entry = uvm_mapent_alloc(new_map);
+ /* old_entry -> new_entry */
+ uvm_mapent_copy(old_entry, new_entry);
+
+ if (new_entry->aref.ar_amap)
+ amap_ref(new_entry, 0);
+
+ if (new_entry->object.uvm_obj &&
+ new_entry->object.uvm_obj->pgops->pgo_reference)
+ new_entry->object.uvm_obj->pgops->pgo_reference
+ (new_entry->object.uvm_obj);
+
+ /* new pmap has nothing wired in it */
+ new_entry->wired_count = 0;
+
+ new_entry->etype |=
+ (UVM_ET_COPYONWRITE|UVM_ET_NEEDSCOPY);
+ uvm_map_entry_link(new_map, new_map->header.prev,
+ new_entry);
+
+ /*
+ * the new entry will need an amap. it will either
+ * need to be copied from the old entry or created
+ * from scratch (if the old entry does not have an
+ * amap). can we defer this process until later
+ * (by setting "needs_copy") or do we need to copy
+ * the amap now?
+ *
+ * we must copy the amap now if any of the following
+ * conditions hold:
+ * 1. the old entry has an amap and that amap is
+ * being shared. this means that the old (parent)
+ * process is sharing the amap with another
+ * process. if we do not clear needs_copy here
+ * we will end up in a situation where both the
+ * parent and child process are refering to the
+ * same amap with "needs_copy" set. if the
+ * parent write-faults, the fault routine will
+ * clear "needs_copy" in the parent by allocating
+ * a new amap. this is wrong because the
+ * parent is supposed to be sharing the old amap
+ * and the new amap will break that.
+ *
+ * 2. if the old entry has an amap and a non-zero
+ * wire count then we are going to have to call
+ * amap_cow_now to avoid page faults in the
+ * parent process. since amap_cow_now requires
+ * "needs_copy" to be clear we might as well
+ * clear it here as well.
+ *
+ */
+
+ if (old_entry->aref.ar_amap != NULL) {
+
+ if ((amap_flags(old_entry->aref.ar_amap) &
+ AMAP_SHARED) != 0 ||
+ old_entry->wired_count != 0) {
+
+ amap_copy(new_map, new_entry, M_WAITOK, FALSE,
+ 0, 0);
+ /* XXXCDC: M_WAITOK ... ok? */
+ }
+ }
+
+ /*
+ * if the parent's entry is wired down, then the
+ * parent process does not want page faults on
+ * access to that memory. this means that we
+ * cannot do copy-on-write because we can't write
+ * protect the old entry. in this case we
+ * resolve all copy-on-write faults now, using
+ * amap_cow_now. note that we have already
+ * allocated any needed amap (above).
+ */
+
+ if (old_entry->wired_count != 0) {
+
+ /*
+ * resolve all copy-on-write faults now
+ * (note that there is nothing to do if
+ * the old mapping does not have an amap).
+ * XXX: is it worthwhile to bother with pmap_copy
+ * in this case?
+ */
+ if (old_entry->aref.ar_amap)
+ amap_cow_now(new_map, new_entry);
+
+ } else {
+
+ /*
+ * setup mappings to trigger copy-on-write faults
+ * we must write-protect the parent if it has
+ * an amap and it is not already "needs_copy"...
+ * if it is already "needs_copy" then the parent
+ * has already been write-protected by a previous
+ * fork operation.
+ *
+ * if we do not write-protect the parent, then
+ * we must be sure to write-protect the child
+ * after the pmap_copy() operation.
+ *
+ * XXX: pmap_copy should have some way of telling
+ * us that it didn't do anything so we can avoid
+ * calling pmap_protect needlessly.
+ */
+
+ if (old_entry->aref.ar_amap) {
+
+ if (!UVM_ET_ISNEEDSCOPY(old_entry)) {
+ if (old_entry->max_protection & VM_PROT_WRITE) {
+ pmap_protect(old_map->pmap,
+ old_entry->start,
+ old_entry->end,
+ old_entry->protection &
+ ~VM_PROT_WRITE);
+ }
+ old_entry->etype |= UVM_ET_NEEDSCOPY;
+ }
+
+ /*
+ * parent must now be write-protected
+ */
+ protect_child = FALSE;
+ } else {
+
+ /*
+ * we only need to protect the child if the
+ * parent has write access.
+ */
+ if (old_entry->max_protection & VM_PROT_WRITE)
+ protect_child = TRUE;
+ else
+ protect_child = FALSE;
+
+ }
+
+ /*
+ * copy the mappings
+ * XXX: need a way to tell if this does anything
+ */
+
+ pmap_copy(new_pmap, old_map->pmap,
+ new_entry->start,
+ (old_entry->end - old_entry->start),
+ old_entry->start);
+
+ /*
+ * protect the child's mappings if necessary
+ */
+ if (protect_child) {
+ pmap_protect(new_pmap, new_entry->start,
+ new_entry->end,
+ new_entry->protection &
+ ~VM_PROT_WRITE);
+ }
+
+ }
+ break;
+ } /* end of switch statement */
+ old_entry = old_entry->next;
+ }
+
+ new_map->size = old_map->size;
+ vm_map_unlock(old_map);
+
+#if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
+ /*
+ * allocate zero fill area in the new vmspace's map for user
+ * page tables for ports that have old style pmaps that keep
+ * user page tables in the top part of the process' address
+ * space.
+ *
+ * XXXCDC: this should go away once all pmaps are fixed
+ */
+ {
+ vaddr_t addr = VM_MAXUSER_ADDRESS;
+ if (uvm_map(new_map, &addr, VM_MAX_ADDRESS - addr, NULL,
+ UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL,
+ UVM_PROT_ALL, UVM_INH_NONE, UVM_ADV_NORMAL,
+ UVM_FLAG_FIXED|UVM_FLAG_COPYONW)) != KERN_SUCCESS)
+ panic("vm_allocate of PT page area failed");
+ }
+#endif
+
+#ifdef SYSVSHM
+ if (vm1->vm_shm)
+ shmfork(vm1, vm2);
+#endif
+
+ UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+ return(vm2);
+}
+
+
+#if defined(DDB)
+
+/*
+ * DDB hooks
+ */
+
+/*
+ * uvm_map_print: print out a map
+ */
+
+void
+uvm_map_print(map, full)
+ vm_map_t map;
+ boolean_t full;
+{
+
+ uvm_map_printit(map, full, printf);
+}
+
+/*
+ * uvm_map_printit: actually prints the map
+ */
+
+void
+uvm_map_printit(map, full, pr)
+ vm_map_t map;
+ boolean_t full;
+ int (*pr) __P((const char *, ...));
+{
+ vm_map_entry_t entry;
+
+ (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
+ (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d\n",
+ map->nentries, map->size, map->ref_count, map->timestamp);
+#ifdef pmap_resident_count
+ (*pr)("\tpmap=%p(resident=%d)\n", map->pmap,
+ pmap_resident_count(map->pmap));
+#else
+ /* XXXCDC: this should be required ... */
+ (*pr)("\tpmap=%p(resident=<<NOT SUPPORTED!!!>>)\n", map->pmap);
+#endif
+ if (!full)
+ return;
+ for (entry = map->header.next; entry != &map->header;
+ entry = entry->next) {
+ (*pr)(" - %p: 0x%lx->0x%lx: obj=%p/0x%x, amap=%p/%d\n",
+ entry, entry->start, entry->end, entry->object.uvm_obj,
+ entry->offset, entry->aref.ar_amap, entry->aref.ar_pageoff);
+ (*pr)(
+"\tsubmap=%c, cow=%c, nc=%c, prot(max)=%d/%d, inh=%d, wc=%d, adv=%d\n",
+ (entry->etype & UVM_ET_SUBMAP) ? 'T' : 'F',
+ (entry->etype & UVM_ET_COPYONWRITE) ? 'T' : 'F',
+ (entry->etype & UVM_ET_NEEDSCOPY) ? 'T' : 'F',
+ entry->protection, entry->max_protection,
+ entry->inheritance, entry->wired_count, entry->advice);
+ }
+}
+
+/*
+ * uvm_object_print: print out an object
+ */
+
+void
+uvm_object_print(uobj, full)
+ struct uvm_object *uobj;
+ boolean_t full;
+{
+
+ uvm_object_printit(uobj, full, printf);
+}
+
+/*
+ * uvm_object_printit: actually prints the object
+ */
+
+void
+uvm_object_printit(uobj, full, pr)
+ struct uvm_object *uobj;
+ boolean_t full;
+ int (*pr) __P((const char *, ...));
+{
+ struct vm_page *pg;
+ int cnt = 0;
+
+ (*pr)("OBJECT %p: pgops=%p, npages=%d, ", uobj, uobj->pgops,
+ uobj->uo_npages);
+ if (uobj->uo_refs == UVM_OBJ_KERN)
+ (*pr)("refs=<SYSTEM>\n");
+ else
+ (*pr)("refs=%d\n", uobj->uo_refs);
+
+ if (!full) return;
+ (*pr)(" PAGES <pg,offset>:\n ");
+ for (pg = uobj->memq.tqh_first ; pg ; pg = pg->listq.tqe_next, cnt++) {
+ (*pr)("<%p,0x%lx> ", pg, pg->offset);
+ if ((cnt % 3) == 2) (*pr)("\n ");
+ }
+ if ((cnt % 3) != 2) (*pr)("\n");
+}
+
+/*
+ * uvm_page_print: print out a page
+ */
+
+void
+uvm_page_print(pg, full)
+ struct vm_page *pg;
+ boolean_t full;
+{
+
+ uvm_page_printit(pg, full, printf);
+}
+
+/*
+ * uvm_page_printit: actually print the page
+ */
+
+void
+uvm_page_printit(pg, full, pr)
+ struct vm_page *pg;
+ boolean_t full;
+ int (*pr) __P((const char *, ...));
+{
+ struct vm_page *lcv;
+ struct uvm_object *uobj;
+ struct pglist *pgl;
+
+ (*pr)("PAGE %p:\n", pg);
+ (*pr)(" flags=0x%x, pqflags=0x%x, vers=%d, wire_count=%d, pa=0x%lx\n",
+ pg->flags, pg->pqflags, pg->version, pg->wire_count, (long)pg->phys_addr);
+ (*pr)(" uobject=%p, uanon=%p, offset=0x%lx loan_count=%d\n",
+ pg->uobject, pg->uanon, pg->offset, pg->loan_count);
+#if defined(UVM_PAGE_TRKOWN)
+ if (pg->flags & PG_BUSY)
+ (*pr)(" owning process = %d, tag=%s\n",
+ pg->owner, pg->owner_tag);
+ else
+ (*pr)(" page not busy, no owner\n");
+#else
+ (*pr)(" [page ownership tracking disabled]\n");
+#endif
+
+ if (!full)
+ return;
+
+ /* cross-verify object/anon */
+ if ((pg->pqflags & PQ_FREE) == 0) {
+ if (pg->pqflags & PQ_ANON) {
+ if (pg->uanon == NULL || pg->uanon->u.an_page != pg)
+ (*pr)(" >>> ANON DOES NOT POINT HERE <<< (%p)\n",
+ (pg->uanon) ? pg->uanon->u.an_page : NULL);
+ else
+ (*pr)(" anon backpointer is OK\n");
+ } else {
+ uobj = pg->uobject;
+ if (uobj) {
+ (*pr)(" checking object list\n");
+ for (lcv = uobj->memq.tqh_first ; lcv ;
+ lcv = lcv->listq.tqe_next) {
+ if (lcv == pg) break;
+ }
+ if (lcv)
+ (*pr)(" page found on object list\n");
+ else
+ (*pr)(" >>> PAGE NOT FOUND ON OBJECT LIST! <<<\n");
+ }
+ }
+ }
+
+ /* cross-verify page queue */
+ if (pg->pqflags & PQ_FREE)
+ pgl = &uvm.page_free[uvm_page_lookup_freelist(pg)];
+ else if (pg->pqflags & PQ_INACTIVE)
+ pgl = (pg->pqflags & PQ_SWAPBACKED) ?
+ &uvm.page_inactive_swp : &uvm.page_inactive_obj;
+ else if (pg->pqflags & PQ_ACTIVE)
+ pgl = &uvm.page_active;
+ else
+ pgl = NULL;
+
+ if (pgl) {
+ (*pr)(" checking pageq list\n");
+ for (lcv = pgl->tqh_first ; lcv ; lcv = lcv->pageq.tqe_next) {
+ if (lcv == pg) break;
+ }
+ if (lcv)
+ (*pr)(" page found on pageq list\n");
+ else
+ (*pr)(" >>> PAGE NOT FOUND ON PAGEQ LIST! <<<\n");
+ }
+}
+#endif
diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h
new file mode 100644
index 00000000000..4c10b5222d1
--- /dev/null
+++ b/sys/uvm/uvm_map.h
@@ -0,0 +1,166 @@
+/* $NetBSD: uvm_map.h,v 1.10 1998/10/11 23:14:48 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.h 8.3 (Berkeley) 3/15/94
+ * from: Id: uvm_map.h,v 1.1.2.3 1998/02/07 01:16:55 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_MAP_H_
+#define _UVM_UVM_MAP_H_
+
+/*
+ * uvm_map.h
+ */
+
+/*
+ * macros
+ */
+
+/*
+ * UVM_MAP_CLIP_START: ensure that the entry begins at or after
+ * the starting address, if it doesn't we split the entry.
+ *
+ * => map must be locked by caller
+ */
+
+#define UVM_MAP_CLIP_START(MAP,ENTRY,VA) { \
+ if ((VA) > (ENTRY)->start) uvm_map_clip_start(MAP,ENTRY,VA); }
+
+/*
+ * UVM_MAP_CLIP_END: ensure that the entry ends at or before
+ * the ending address, if it does't we split the entry.
+ *
+ * => map must be locked by caller
+ */
+
+#define UVM_MAP_CLIP_END(MAP,ENTRY,VA) { \
+ if ((VA) < (ENTRY)->end) uvm_map_clip_end(MAP,ENTRY,VA); }
+
+/*
+ * extract flags
+ */
+#define UVM_EXTRACT_REMOVE 0x1 /* remove mapping from old map */
+#define UVM_EXTRACT_CONTIG 0x2 /* try to keep it contig */
+#define UVM_EXTRACT_QREF 0x4 /* use quick refs */
+#define UVM_EXTRACT_FIXPROT 0x8 /* set prot to maxprot as we go */
+
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_MAP_INLINE
+#define MAP_INLINE static __inline
+#else
+#define MAP_INLINE /* nothing */
+#endif /* UVM_MAP_INLINE */
+
+/*
+ * protos: the following prototypes define the interface to vm_map
+ */
+
+MAP_INLINE
+void uvm_map_deallocate __P((vm_map_t));
+
+int uvm_map_clean __P((vm_map_t, vaddr_t, vaddr_t, int));
+void uvm_map_clip_start __P((vm_map_t,
+ vm_map_entry_t, vaddr_t));
+void uvm_map_clip_end __P((vm_map_t, vm_map_entry_t,
+ vaddr_t));
+MAP_INLINE
+vm_map_t uvm_map_create __P((pmap_t, vaddr_t,
+ vaddr_t, boolean_t));
+int uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t,
+ vm_map_t, vaddr_t *, int));
+vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
+ vaddr_t *, struct uvm_object *, vaddr_t,
+ boolean_t));
+int uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t,
+ vm_inherit_t));
+void uvm_map_init __P((void));
+boolean_t uvm_map_lookup_entry __P((vm_map_t, vaddr_t,
+ vm_map_entry_t *));
+MAP_INLINE
+void uvm_map_reference __P((vm_map_t));
+int uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t,
+ vm_map_entry_t, int));
+int uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t,
+ vaddr_t *));
+void uvm_map_setup __P((vm_map_t, vaddr_t,
+ vaddr_t, boolean_t));
+int uvm_map_submap __P((vm_map_t, vaddr_t,
+ vaddr_t, vm_map_t));
+MAP_INLINE
+int uvm_unmap __P((vm_map_t, vaddr_t, vaddr_t));
+void uvm_unmap_detach __P((vm_map_entry_t,int));
+int uvm_unmap_remove __P((vm_map_t, vaddr_t, vaddr_t,
+ vm_map_entry_t *));
+
+struct vmspace *uvmspace_fork __P((struct vmspace *));
+
+#endif /* _UVM_UVM_MAP_H_ */
diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h
new file mode 100644
index 00000000000..56842e191b6
--- /dev/null
+++ b/sys/uvm/uvm_map_i.h
@@ -0,0 +1,243 @@
+/* $NetBSD: uvm_map_i.h,v 1.10 1998/10/11 23:14:48 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_map.c 8.3 (Berkeley) 1/12/94
+ * from: Id: uvm_map_i.h,v 1.1.2.1 1997/08/14 19:10:50 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_MAP_I_H_
+#define _UVM_UVM_MAP_I_H_
+
+/*
+ * uvm_map_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_MAP_INLINE) || defined(UVM_MAP)
+
+/*
+ * uvm_map_create: create map
+ */
+
+MAP_INLINE vm_map_t
+uvm_map_create(pmap, min, max, pageable)
+ pmap_t pmap;
+ vaddr_t min, max;
+ boolean_t pageable;
+{
+ vm_map_t result;
+
+ MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK);
+ uvm_map_setup(result, min, max, pageable);
+ result->pmap = pmap;
+ return(result);
+}
+
+/*
+ * uvm_map_setup: init map
+ *
+ * => map must not be in service yet.
+ */
+
+MAP_INLINE void
+uvm_map_setup(map, min, max, pageable)
+ vm_map_t map;
+ vaddr_t min, max;
+ boolean_t pageable;
+{
+
+ map->header.next = map->header.prev = &map->header;
+ map->nentries = 0;
+ map->size = 0;
+ map->ref_count = 1;
+ map->min_offset = min;
+ map->max_offset = max;
+ map->entries_pageable = pageable;
+ map->first_free = &map->header;
+ map->hint = &map->header;
+ map->timestamp = 0;
+ lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
+ simple_lock_init(&map->ref_lock);
+ simple_lock_init(&map->hint_lock);
+}
+
+
+/*
+ * U N M A P - m a i n e n t r y p o i n t
+ */
+
+/*
+ * uvm_unmap: remove mappings from a vm_map (from "start" up to "stop")
+ *
+ * => caller must check alignment and size
+ * => map must be unlocked (we will lock it)
+ * => if the "start"/"stop" range lie within a mapping of a share map,
+ * then the unmap takes place within the context of that share map
+ * rather than in the main map, unless the "mainonly" flag is set.
+ * (e.g. the "exit" system call would want to set "mainonly").
+ */
+
+MAP_INLINE int
+uvm_unmap(map, start, end)
+ vm_map_t map;
+ vaddr_t start,end;
+{
+ int result;
+ vm_map_entry_t dead_entries;
+ UVMHIST_FUNC("uvm_unmap"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, " (map=0x%x, start=0x%x, end=0x%x)",
+ map, start, end, 0);
+ /*
+ * work now done by helper functions. wipe the pmap's and then
+ * detach from the dead entries...
+ */
+ vm_map_lock(map);
+ result = uvm_unmap_remove(map, start, end, &dead_entries);
+ vm_map_unlock(map);
+
+ if (dead_entries != NULL)
+ uvm_unmap_detach(dead_entries, 0);
+
+ UVMHIST_LOG(maphist, "<- done", 0,0,0,0);
+ return(result);
+}
+
+
+/*
+ * uvm_map_reference: add reference to a map
+ *
+ * => map need not be locked (we use ref_lock).
+ */
+
+MAP_INLINE void
+uvm_map_reference(map)
+ vm_map_t map;
+{
+ if (map == NULL) {
+#ifdef DIAGNOSTIC
+ printf("uvm_map_reference: reference to NULL map\n");
+#ifdef DDB
+ Debugger();
+#endif
+#endif
+ return;
+ }
+
+ simple_lock(&map->ref_lock);
+ map->ref_count++;
+ simple_unlock(&map->ref_lock);
+}
+
+/*
+ * uvm_map_deallocate: drop reference to a map
+ *
+ * => caller must not lock map
+ * => we will zap map if ref count goes to zero
+ */
+
+MAP_INLINE void
+uvm_map_deallocate(map)
+ vm_map_t map;
+{
+ int c;
+
+ if (map == NULL) {
+#ifdef DIAGNOSTIC
+ printf("uvm_map_deallocate: reference to NULL map\n");
+#ifdef DDB
+ Debugger();
+#endif
+#endif
+ return;
+ }
+
+ simple_lock(&map->ref_lock);
+ c = --map->ref_count;
+ simple_unlock(&map->ref_lock);
+
+ if (c > 0) {
+ return;
+ }
+
+ /*
+ * all references gone. unmap and free.
+ */
+
+ uvm_unmap(map, map->min_offset, map->max_offset);
+ pmap_destroy(map->pmap);
+
+ FREE(map, M_VMMAP);
+}
+
+#endif /* defined(UVM_MAP_INLINE) || defined(UVM_MAP) */
+
+#endif /* _UVM_UVM_MAP_I_H_ */
diff --git a/sys/uvm/uvm_meter.c b/sys/uvm/uvm_meter.c
new file mode 100644
index 00000000000..e064a087e64
--- /dev/null
+++ b/sys/uvm/uvm_meter.c
@@ -0,0 +1,246 @@
+/* $NetBSD: uvm_meter.c,v 1.7 1998/08/09 22:36:39 perry Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, and the University of California, Berkeley
+ * and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_meter.c 8.4 (Berkeley) 1/4/94
+ * from: Id: uvm_meter.c,v 1.1.2.1 1997/08/14 19:10:35 chuck Exp
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+#include <sys/exec.h>
+
+/*
+ * maxslp: ???? XXXCDC
+ */
+
+int maxslp = MAXSLP; /* patchable ... */
+struct loadavg averunnable; /* decl. */
+
+/*
+ * constants for averages over 1, 5, and 15 minutes when sampling at
+ * 5 second intervals.
+ */
+
+static fixpt_t cexp[3] = {
+ 0.9200444146293232 * FSCALE, /* exp(-1/12) */
+ 0.9834714538216174 * FSCALE, /* exp(-1/60) */
+ 0.9944598480048967 * FSCALE, /* exp(-1/180) */
+};
+
+/*
+ * prototypes
+ */
+
+static void uvm_loadav __P((struct loadavg *));
+
+/*
+ * uvm_meter: calculate load average and wake up the swapper (if needed)
+ */
+void
+uvm_meter()
+{
+ if ((time.tv_sec % 5) == 0)
+ uvm_loadav(&averunnable);
+ if (proc0.p_slptime > (maxslp / 2))
+ wakeup((caddr_t)&proc0);
+}
+
+/*
+ * uvm_loadav: compute a tenex style load average of a quantity on
+ * 1, 5, and 15 minute internvals.
+ */
+static void
+uvm_loadav(avg)
+ struct loadavg *avg;
+{
+ int i, nrun;
+ struct proc *p;
+
+ for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+ switch (p->p_stat) {
+ case SSLEEP:
+ if (p->p_priority > PZERO || p->p_slptime > 1)
+ continue;
+ /* fall through */
+ case SRUN:
+ case SIDL:
+ nrun++;
+ }
+ }
+ for (i = 0; i < 3; i++)
+ avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
+ nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
+}
+
+/*
+ * uvm_sysctl: sysctl hook into UVM system.
+ */
+int
+uvm_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
+ int *name;
+ u_int namelen;
+ void *oldp;
+ size_t *oldlenp;
+ void *newp;
+ size_t newlen;
+ struct proc *p;
+{
+ struct vmtotal vmtotals;
+ struct _ps_strings _ps = { PS_STRINGS };
+
+ /* all sysctl names at this level are terminal */
+ if (namelen != 1)
+ return (ENOTDIR); /* overloaded */
+
+ switch (name[0]) {
+ case VM_LOADAVG:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &averunnable,
+ sizeof(averunnable)));
+
+ case VM_METER:
+ uvm_total(&vmtotals);
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &vmtotals,
+ sizeof(vmtotals)));
+
+ case VM_UVMEXP:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &uvmexp,
+ sizeof(uvmexp)));
+
+ case VM_PSSTRINGS:
+ return (sysctl_rdstruct(oldp, oldlenp, newp, &_ps,
+ sizeof(_ps)));
+
+ default:
+ return (EOPNOTSUPP);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * uvm_total: calculate the current state of the system.
+ */
+void
+uvm_total(totalp)
+ struct vmtotal *totalp;
+{
+ struct proc *p;
+#if 0
+ vm_map_entry_t entry;
+ vm_map_t map;
+ int paging;
+#endif
+
+ bzero(totalp, sizeof *totalp);
+
+ /*
+ * calculate process statistics
+ */
+
+ for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
+ if (p->p_flag & P_SYSTEM)
+ continue;
+ switch (p->p_stat) {
+ case 0:
+ continue;
+
+ case SSLEEP:
+ case SSTOP:
+ if (p->p_flag & P_INMEM) {
+ if (p->p_priority <= PZERO)
+ totalp->t_dw++;
+ else if (p->p_slptime < maxslp)
+ totalp->t_sl++;
+ } else if (p->p_slptime < maxslp)
+ totalp->t_sw++;
+ if (p->p_slptime >= maxslp)
+ continue;
+ break;
+
+ case SRUN:
+ case SIDL:
+ if (p->p_flag & P_INMEM)
+ totalp->t_rq++;
+ else
+ totalp->t_sw++;
+ if (p->p_stat == SIDL)
+ continue;
+ break;
+ }
+ /*
+ * note active objects
+ */
+#if 0
+ /*
+ * XXXCDC: BOGUS! rethink this. in the mean time
+ * don't do it.
+ */
+ paging = 0;
+ vm_map_lock(map);
+ for (map = &p->p_vmspace->vm_map, entry = map->header.next;
+ entry != &map->header; entry = entry->next) {
+ if (entry->is_a_map || entry->is_sub_map ||
+ entry->object.uvm_obj == NULL)
+ continue;
+ /* XXX how to do this with uvm */
+ }
+ vm_map_unlock(map);
+ if (paging)
+ totalp->t_pw++;
+#endif
+ }
+ /*
+ * Calculate object memory usage statistics.
+ */
+ totalp->t_free = uvmexp.free;
+ totalp->t_vm = uvmexp.npages - uvmexp.free + uvmexp.swpginuse;
+ totalp->t_avm = uvmexp.active + uvmexp.swpginuse; /* XXX */
+ totalp->t_rm = uvmexp.npages - uvmexp.free;
+ totalp->t_arm = uvmexp.active;
+ totalp->t_vmshr = 0; /* XXX */
+ totalp->t_avmshr = 0; /* XXX */
+ totalp->t_rmshr = 0; /* XXX */
+ totalp->t_armshr = 0; /* XXX */
+}
diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c
new file mode 100644
index 00000000000..66724213c55
--- /dev/null
+++ b/sys/uvm/uvm_mmap.c
@@ -0,0 +1,963 @@
+/* $NetBSD: uvm_mmap.c,v 1.15 1998/10/11 23:18:20 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.
+ * Copyright (c) 1988 University of Utah.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the Charles D. Cranor,
+ * Washington University, University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
+ * @(#)vm_mmap.c 8.5 (Berkeley) 5/19/94
+ * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
+ */
+
+/*
+ * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
+ * function.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/resourcevar.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+#include <sys/stat.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <sys/syscallargs.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_device.h>
+#include <uvm/uvm_vnode.h>
+
+
+/*
+ * unimplemented VM system calls:
+ */
+
+/*
+ * sys_sbrk: sbrk system call.
+ */
+
+/* ARGSUSED */
+int
+sys_sbrk(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+#if 0
+ struct sys_sbrk_args /* {
+ syscallarg(int) incr;
+ } */ *uap = v;
+#endif
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * sys_sstk: sstk system call.
+ */
+
+/* ARGSUSED */
+int
+sys_sstk(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+#if 0
+ struct sys_sstk_args /* {
+ syscallarg(int) incr;
+ } */ *uap = v;
+#endif
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * sys_madvise: give advice about memory usage.
+ */
+
+/* ARGSUSED */
+int
+sys_madvise(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+#if 0
+ struct sys_madvise_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ syscallarg(int) behav;
+ } */ *uap = v;
+#endif
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * sys_mincore: determine if pages are in core or not.
+ */
+
+/* ARGSUSED */
+int
+sys_mincore(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+#if 0
+ struct sys_mincore_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ syscallarg(char *) vec;
+ } */ *uap = v;
+#endif
+
+ return (EOPNOTSUPP);
+}
+
+#if 0
+/*
+ * munmapfd: unmap file descriptor
+ *
+ * XXX: is this acutally a useful function? could it be useful?
+ */
+
+void
+munmapfd(p, fd)
+ struct proc *p;
+ int fd;
+{
+
+ /*
+ * XXX should vm_deallocate any regions mapped to this file
+ */
+ p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
+}
+#endif
+
+/*
+ * sys_mmap: mmap system call.
+ *
+ * => file offest and address may not be page aligned
+ * - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
+ * - if address isn't page aligned the mapping starts at trunc_page(addr)
+ * and the return value is adjusted up by the page offset.
+ */
+
+int
+sys_mmap(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ register struct sys_mmap_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ syscallarg(int) prot;
+ syscallarg(int) flags;
+ syscallarg(int) fd;
+ syscallarg(long) pad;
+ syscallarg(off_t) pos;
+ } */ *uap = v;
+ vaddr_t addr;
+ struct vattr va;
+ off_t pos;
+ vsize_t size, pageoff;
+ vm_prot_t prot, maxprot;
+ int flags, fd;
+ vaddr_t vm_min_address = VM_MIN_ADDRESS;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vnode *vp;
+ caddr_t handle;
+ int error;
+
+ /*
+ * first, extract syscall args from the uap.
+ */
+
+ addr = (vaddr_t) SCARG(uap, addr);
+ size = (vsize_t) SCARG(uap, len);
+ prot = SCARG(uap, prot) & VM_PROT_ALL;
+ flags = SCARG(uap, flags);
+ fd = SCARG(uap, fd);
+ pos = SCARG(uap, pos);
+
+ /*
+ * make sure that the newsize fits within a vaddr_t
+ * XXX: need to revise addressing data types
+ */
+ if (pos + size > (vaddr_t)-PAGE_SIZE) {
+#ifdef DEBUG
+ printf("mmap: pos=%qx, size=%x too big\n", pos, (int)size);
+#endif
+ return (EINVAL);
+ }
+
+ /*
+ * align file position and save offset. adjust size.
+ */
+
+ pageoff = (pos & PAGE_MASK);
+ pos -= pageoff;
+ size += pageoff; /* add offset */
+ size = (vsize_t) round_page(size); /* round up */
+ if ((ssize_t) size < 0)
+ return (EINVAL); /* don't allow wrap */
+
+ /*
+ * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
+ */
+
+ if (flags & MAP_FIXED) {
+
+ /* ensure address and file offset are aligned properly */
+ addr -= pageoff;
+ if (addr & PAGE_MASK)
+ return (EINVAL);
+
+ if (VM_MAXUSER_ADDRESS > 0 &&
+ (addr + size) > VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ if (vm_min_address > 0 && addr < vm_min_address)
+ return (EINVAL);
+ if (addr > addr + size)
+ return (EINVAL); /* no wrapping! */
+
+ } else {
+
+ /*
+ * not fixed: make sure we skip over the largest possible heap.
+ * we will refine our guess later (e.g. to account for VAC, etc)
+ */
+ if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
+ addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
+ }
+
+ /*
+ * check for file mappings (i.e. not anonymous) and verify file.
+ */
+
+ if ((flags & MAP_ANON) == 0) {
+
+ if (fd < 0 || fd >= fdp->fd_nfiles)
+ return(EBADF); /* failed range check? */
+ fp = fdp->fd_ofiles[fd]; /* convert to file pointer */
+ if (fp == NULL)
+ return(EBADF);
+
+ if (fp->f_type != DTYPE_VNODE)
+ return (ENODEV); /* only mmap vnodes! */
+ vp = (struct vnode *)fp->f_data; /* convert to vnode */
+
+ if (vp->v_type != VREG && vp->v_type != VCHR &&
+ vp->v_type != VBLK)
+ return (ENODEV); /* only REG/CHR/BLK support mmap */
+
+ /* special case: catch SunOS style /dev/zero */
+ if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
+ flags |= MAP_ANON;
+ goto is_anon;
+ }
+
+ /*
+ * Old programs may not select a specific sharing type, so
+ * default to an appropriate one.
+ *
+ * XXX: how does MAP_ANON fit in the picture?
+ */
+ if ((flags & (MAP_SHARED|MAP_PRIVATE|MAP_COPY)) == 0) {
+#if defined(DEBUG)
+ printf("WARNING: defaulted mmap() share type to "
+ "%s (pid %d comm %s)\n", vp->v_type == VCHR ?
+ "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
+ p->p_comm);
+#endif
+ if (vp->v_type == VCHR)
+ flags |= MAP_SHARED; /* for a device */
+ else
+ flags |= MAP_PRIVATE; /* for a file */
+ }
+
+ /*
+ * MAP_PRIVATE device mappings don't make sense (and aren't
+ * supported anyway). However, some programs rely on this,
+ * so just change it to MAP_SHARED.
+ */
+ if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
+#if defined(DIAGNOSTIC)
+ printf("WARNING: converted MAP_PRIVATE device mapping "
+ "to MAP_SHARED (pid %d comm %s)\n", p->p_pid,
+ p->p_comm);
+#endif
+ flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
+ }
+
+ /*
+ * now check protection
+ */
+
+ maxprot = VM_PROT_EXECUTE;
+
+ /* check read access */
+ if (fp->f_flag & FREAD)
+ maxprot |= VM_PROT_READ;
+ else if (prot & PROT_READ)
+ return (EACCES);
+
+ /* check write access, shared case first */
+ if (flags & MAP_SHARED) {
+ /*
+ * if the file is writable, only add PROT_WRITE to
+ * maxprot if the file is not immutable, append-only.
+ * otherwise, if we have asked for PROT_WRITE, return
+ * EPERM.
+ */
+ if (fp->f_flag & FWRITE) {
+ if ((error =
+ VOP_GETATTR(vp, &va, p->p_ucred, p)))
+ return (error);
+ if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
+ maxprot |= VM_PROT_WRITE;
+ else if (prot & PROT_WRITE)
+ return (EPERM);
+ }
+ else if (prot & PROT_WRITE)
+ return (EACCES);
+ } else {
+ /* MAP_PRIVATE mappings can always write to */
+ maxprot |= VM_PROT_WRITE;
+ }
+
+ /*
+ * set handle to vnode
+ */
+
+ handle = (caddr_t)vp;
+
+ } else { /* MAP_ANON case */
+
+ if (fd != -1)
+ return (EINVAL);
+
+is_anon: /* label for SunOS style /dev/zero */
+ handle = NULL;
+ maxprot = VM_PROT_ALL;
+ pos = 0;
+ }
+
+ /*
+ * now let kernel internal function uvm_mmap do the work.
+ */
+
+ error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
+ flags, handle, pos);
+
+ if (error == 0)
+ /* remember to add offset */
+ *retval = (register_t)(addr + pageoff);
+
+ return (error);
+}
+
+/*
+ * XXX
+ * XXX
+ * XXX
+ */
+int
+sys_omsync(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ return EOPNOTSUPP;
+}
+
+/*
+ * sys___msync13: the msync system call (a front-end for flush)
+ */
+
+int
+sys_msync(p, v, retval) /* ART_UVM_XXX - is this correct msync? */
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_msync_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ syscallarg(int) flags;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ vm_map_t map;
+ int rv, flags, uvmflags;
+
+ /*
+ * extract syscall args from the uap
+ */
+
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+ flags = SCARG(uap, flags);
+
+ /* sanity check flags */
+ if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
+ (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
+ (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
+ return (EINVAL);
+ if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
+ flags |= MS_SYNC;
+
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ /* disallow wrap-around. */
+ if (addr + size < addr)
+ return (EINVAL);
+
+ /*
+ * get map
+ */
+
+ map = &p->p_vmspace->vm_map;
+
+ /*
+ * XXXCDC: do we really need this semantic?
+ *
+ * XXX Gak! If size is zero we are supposed to sync "all modified
+ * pages with the region containing addr". Unfortunately, we
+ * don't really keep track of individual mmaps so we approximate
+ * by flushing the range of the map entry containing addr.
+ * This can be incorrect if the region splits or is coalesced
+ * with a neighbor.
+ */
+ if (size == 0) {
+ vm_map_entry_t entry;
+
+ vm_map_lock_read(map);
+ rv = uvm_map_lookup_entry(map, addr, &entry);
+ if (rv == TRUE) {
+ addr = entry->start;
+ size = entry->end - entry->start;
+ }
+ vm_map_unlock_read(map);
+ if (rv == FALSE)
+ return (EINVAL);
+ }
+
+ /*
+ * translate MS_ flags into PGO_ flags
+ */
+ uvmflags = (flags & MS_INVALIDATE) ? PGO_FREE : 0;
+ if (flags & MS_SYNC)
+ uvmflags |= PGO_SYNCIO;
+ else
+ uvmflags |= PGO_SYNCIO; /* XXXCDC: force sync for now! */
+
+ /*
+ * doit!
+ */
+ rv = uvm_map_clean(map, addr, addr+size, uvmflags);
+
+ /*
+ * and return...
+ */
+ switch (rv) {
+ case KERN_SUCCESS:
+ return(0);
+ case KERN_INVALID_ADDRESS:
+ return (ENOMEM);
+ case KERN_FAILURE:
+ return (EIO);
+ case KERN_PAGES_LOCKED: /* XXXCDC: uvm doesn't return this */
+ return (EBUSY);
+ default:
+ return (EINVAL);
+ }
+ /*NOTREACHED*/
+}
+
+/*
+ * sys_munmap: unmap a users memory
+ */
+
+int
+sys_munmap(p, v, retval)
+ register struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ register struct sys_munmap_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(size_t) len;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ vm_map_t map;
+ vaddr_t vm_min_address = VM_MIN_ADDRESS;
+ struct vm_map_entry *dead_entries;
+
+ /*
+ * get syscall args...
+ */
+
+ addr = (vaddr_t) SCARG(uap, addr);
+ size = (vsize_t) SCARG(uap, len);
+
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ if ((int)size < 0)
+ return (EINVAL);
+ if (size == 0)
+ return (0);
+
+ /*
+ * Check for illegal addresses. Watch out for address wrap...
+ * Note that VM_*_ADDRESS are not constants due to casts (argh).
+ */
+ if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
+ return (EINVAL);
+ if (vm_min_address > 0 && addr < vm_min_address)
+ return (EINVAL);
+ if (addr > addr + size)
+ return (EINVAL);
+ map = &p->p_vmspace->vm_map;
+
+
+ vm_map_lock(map); /* lock map so we can checkprot */
+
+ /*
+ * interesting system call semantic: make sure entire range is
+ * allocated before allowing an unmap.
+ */
+
+ if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
+ vm_map_unlock(map);
+ return (EINVAL);
+ }
+
+ /*
+ * doit!
+ */
+ (void) uvm_unmap_remove(map, addr, addr + size, &dead_entries);
+
+ vm_map_unlock(map); /* and unlock */
+
+ if (dead_entries != NULL)
+ uvm_unmap_detach(dead_entries, 0);
+
+ return (0);
+}
+
+/*
+ * sys_mprotect: the mprotect system call
+ */
+
+int
+sys_mprotect(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_mprotect_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(int) len;
+ syscallarg(int) prot;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ vm_prot_t prot;
+ int rv;
+
+ /*
+ * extract syscall args from uap
+ */
+
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+ prot = SCARG(uap, prot) & VM_PROT_ALL;
+
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+ if ((int)size < 0)
+ return (EINVAL);
+
+ /*
+ * doit
+ */
+
+ rv = uvm_map_protect(&p->p_vmspace->vm_map,
+ addr, addr+size, prot, FALSE);
+
+ if (rv == KERN_SUCCESS)
+ return (0);
+ if (rv == KERN_PROTECTION_FAILURE)
+ return (EACCES);
+ return (EINVAL);
+}
+
+/*
+ * sys_minherit: the minherit system call
+ */
+
+int
+sys_minherit(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_minherit_args /* {
+ syscallarg(caddr_t) addr;
+ syscallarg(int) len;
+ syscallarg(int) inherit;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ register vm_inherit_t inherit;
+
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+ inherit = SCARG(uap, inherit);
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ if ((int)size < 0)
+ return (EINVAL);
+
+ switch (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+ inherit)) {
+ case KERN_SUCCESS:
+ return (0);
+ case KERN_PROTECTION_FAILURE:
+ return (EACCES);
+ }
+ return (EINVAL);
+}
+
+/*
+ * sys_mlock: memory lock
+ */
+
+int
+sys_mlock(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_mlock_args /* {
+ syscallarg(const void *) addr;
+ syscallarg(size_t) len;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ int error;
+
+ /*
+ * extract syscall args from uap
+ */
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+
+ /*
+ * align the address to a page boundary and adjust the size accordingly
+ */
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ /* disallow wrap-around. */
+ if (addr + (int)size < addr)
+ return (EINVAL);
+
+ if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
+ return (EAGAIN);
+
+#ifdef pmap_wired_count
+ if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
+ p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
+ return (EAGAIN);
+#else
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+
+ error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * sys_munlock: unlock wired pages
+ */
+
+int
+sys_munlock(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_munlock_args /* {
+ syscallarg(const void *) addr;
+ syscallarg(size_t) len;
+ } */ *uap = v;
+ vaddr_t addr;
+ vsize_t size, pageoff;
+ int error;
+
+ /*
+ * extract syscall args from uap
+ */
+
+ addr = (vaddr_t)SCARG(uap, addr);
+ size = (vsize_t)SCARG(uap, len);
+
+ /*
+ * align the address to a page boundary, and adjust the size accordingly
+ */
+ pageoff = (addr & PAGE_MASK);
+ addr -= pageoff;
+ size += pageoff;
+ size = (vsize_t) round_page(size);
+
+ /* disallow wrap-around. */
+ if (addr + (int)size < addr)
+ return (EINVAL);
+
+#ifndef pmap_wired_count
+ if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
+ return (error);
+#endif
+
+ error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
+ return (error == KERN_SUCCESS ? 0 : ENOMEM);
+}
+
+/*
+ * uvm_mmap: internal version of mmap
+ *
+ * - used by sys_mmap, exec, and sysv shm
+ * - handle is a vnode pointer or NULL for MAP_ANON (XXX: not true,
+ * sysv shm uses "named anonymous memory")
+ * - caller must page-align the file offset
+ */
+
+int
+uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
+ vm_map_t map;
+ vaddr_t *addr;
+ vsize_t size;
+ vm_prot_t prot, maxprot;
+ int flags;
+ caddr_t handle; /* XXX: VNODE? */
+ vaddr_t foff;
+{
+ struct uvm_object *uobj;
+ struct vnode *vp;
+ int retval;
+ int advice = UVM_ADV_NORMAL;
+ uvm_flag_t uvmflag = 0;
+
+ /*
+ * check params
+ */
+
+ if (size == 0)
+ return(0);
+ if (foff & PAGE_MASK)
+ return(EINVAL);
+ if ((prot & maxprot) != prot)
+ return(EINVAL);
+
+ /*
+ * for non-fixed mappings, round off the suggested address.
+ * for fixed mappings, check alignment and zap old mappings.
+ */
+
+ if ((flags & MAP_FIXED) == 0) {
+ *addr = round_page(*addr); /* round */
+ } else {
+
+ if (*addr & PAGE_MASK)
+ return(EINVAL);
+ uvmflag |= UVM_FLAG_FIXED;
+ (void) uvm_unmap(map, *addr, *addr + size); /* zap! */
+ }
+
+ /*
+ * handle anon vs. non-anon mappings. for non-anon mappings attach
+ * to underlying vm object.
+ */
+
+ if (flags & MAP_ANON) {
+
+ foff = UVM_UNKNOWN_OFFSET;
+ uobj = NULL;
+ if ((flags & MAP_SHARED) == 0)
+ /* XXX: defer amap create */
+ uvmflag |= UVM_FLAG_COPYONW;
+ else
+ /* shared: create amap now */
+ uvmflag |= UVM_FLAG_OVERLAY;
+
+ } else {
+
+ vp = (struct vnode *) handle; /* get vnode */
+ if (vp->v_type != VCHR) {
+ uobj = uvn_attach((void *) vp, (flags & MAP_SHARED) ?
+ maxprot : (maxprot & ~VM_PROT_WRITE));
+
+ /*
+ * XXXCDC: hack from old code
+ * don't allow vnodes which have been mapped
+ * shared-writeable to persist [forces them to be
+ * flushed out when last reference goes].
+ * XXXCDC: interesting side effect: avoids a bug.
+ * note that in WRITE [ufs_readwrite.c] that we
+ * allocate buffer, uncache, and then do the write.
+ * the problem with this is that if the uncache causes
+ * VM data to be flushed to the same area of the file
+ * we are writing to... in that case we've got the
+ * buffer locked and our process goes to sleep forever.
+ *
+ * XXXCDC: checking maxprot protects us from the
+ * "persistbug" program but this is not a long term
+ * solution.
+ *
+ * XXXCDC: we don't bother calling uncache with the vp
+ * VOP_LOCKed since we know that we are already
+ * holding a valid reference to the uvn (from the
+ * uvn_attach above), and thus it is impossible for
+ * the uncache to kill the uvn and trigger I/O.
+ */
+ if (flags & MAP_SHARED) {
+ if ((prot & VM_PROT_WRITE) ||
+ (maxprot & VM_PROT_WRITE)) {
+ uvm_vnp_uncache(vp);
+ }
+ }
+
+ } else {
+ uobj = udv_attach((void *) &vp->v_rdev,
+ (flags & MAP_SHARED) ?
+ maxprot : (maxprot & ~VM_PROT_WRITE));
+ advice = UVM_ADV_RANDOM;
+ }
+
+ if (uobj == NULL)
+ return((vp->v_type == VREG) ? ENOMEM : EINVAL);
+
+ if ((flags & MAP_SHARED) == 0)
+ uvmflag |= UVM_FLAG_COPYONW;
+ }
+
+ /*
+ * set up mapping flags
+ */
+
+ uvmflag = UVM_MAPFLAG(prot, maxprot,
+ (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
+ advice, uvmflag);
+
+ /*
+ * do it!
+ */
+
+ retval = uvm_map(map, addr, size, uobj, foff, uvmflag);
+
+ if (retval == KERN_SUCCESS)
+ return(0);
+
+ /*
+ * errors: first detach from the uobj, if any.
+ */
+
+ if (uobj)
+ uobj->pgops->pgo_detach(uobj);
+
+ switch (retval) {
+ case KERN_INVALID_ADDRESS:
+ case KERN_NO_SPACE:
+ return(ENOMEM);
+ case KERN_PROTECTION_FAILURE:
+ return(EACCES);
+ }
+ return(EINVAL);
+}
diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h
new file mode 100644
index 00000000000..10e00d1535a
--- /dev/null
+++ b/sys/uvm/uvm_object.h
@@ -0,0 +1,74 @@
+/* $NetBSD: uvm_object.h,v 1.5 1998/03/09 00:58:58 mrg Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_object.h,v 1.1.2.2 1998/01/04 22:44:51 chuck Exp
+ */
+
+#ifndef _UVM_UVM_OBJECT_H_
+#define _UVM_UVM_OBJECT_H_
+
+/*
+ * uvm_object.h
+ */
+
+/*
+ * uvm_object: all that is left of mach objects.
+ */
+
+struct uvm_object {
+ simple_lock_data_t vmobjlock; /* lock on memq */
+ struct uvm_pagerops *pgops; /* pager ops */
+ struct pglist memq; /* pages in this object */
+ int uo_npages; /* # of pages in memq */
+ int uo_refs; /* reference count */
+};
+
+/*
+ * UVM_OBJ_KERN is a 'special' uo_refs value which indicates that the
+ * object is a kernel memory object rather than a normal one (kernel
+ * memory objects don't have reference counts -- they never die).
+ *
+ * this value is used to detected kernel object mappings at uvm_unmap()
+ * time. normally when an object is unmapped its pages eventaully become
+ * deactivated and then paged out and/or freed. this is not useful
+ * for kernel objects... when a kernel object is unmapped we always want
+ * to free the resources associated with the mapping. UVM_OBJ_KERN
+ * allows us to decide which type of unmapping we want to do.
+ */
+#define UVM_OBJ_KERN (-2)
+
+#endif /* _UVM_UVM_OBJECT_H_ */
diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c
new file mode 100644
index 00000000000..15ad5ce99aa
--- /dev/null
+++ b/sys/uvm/uvm_page.c
@@ -0,0 +1,1122 @@
+/* $NetBSD: uvm_page.c,v 1.15 1998/10/18 23:50:00 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
+ * from: Id: uvm_page.c,v 1.1.2.18 1998/02/06 05:24:42 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_page.c: page ops.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_PAGE /* pull in uvm_page.h functions */
+#include <uvm/uvm.h>
+
+/*
+ * global vars... XXXCDC: move to uvm. structure.
+ */
+
+/*
+ * physical memory config is stored in vm_physmem.
+ */
+
+struct vm_physseg vm_physmem[VM_PHYSSEG_MAX]; /* XXXCDC: uvm.physmem */
+int vm_nphysseg = 0; /* XXXCDC: uvm.nphysseg */
+
+/*
+ * local variables
+ */
+
+/*
+ * these variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes. The implementation of uvm_pageboot_alloc
+ * and pmap_startup here also uses them internally.
+ */
+
+static vaddr_t virtual_space_start;
+static vaddr_t virtual_space_end;
+
+/*
+ * we use a hash table with only one bucket during bootup. we will
+ * later rehash (resize) the hash table once malloc() is ready.
+ * we static allocate the bootstrap bucket below...
+ */
+
+static struct pglist uvm_bootbucket;
+
+/*
+ * local prototypes
+ */
+
+static void uvm_pageinsert __P((struct vm_page *));
+
+
+/*
+ * inline functions
+ */
+
+/*
+ * uvm_pageinsert: insert a page in the object and the hash table
+ *
+ * => caller must lock object
+ * => caller must lock page queues
+ * => call should have already set pg's object and offset pointers
+ * and bumped the version counter
+ */
+
+__inline static void
+uvm_pageinsert(pg)
+ struct vm_page *pg;
+{
+ struct pglist *buck;
+ int s;
+
+#ifdef DIAGNOSTIC
+ if (pg->flags & PG_TABLED)
+ panic("uvm_pageinsert: already inserted");
+#endif
+
+ buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+ s = splimp();
+ simple_lock(&uvm.hashlock);
+ TAILQ_INSERT_TAIL(buck, pg, hashq); /* put in hash */
+ simple_unlock(&uvm.hashlock);
+ splx(s);
+
+ TAILQ_INSERT_TAIL(&pg->uobject->memq, pg, listq); /* put in object */
+ pg->flags |= PG_TABLED;
+ pg->uobject->uo_npages++;
+
+}
+
+/*
+ * uvm_page_remove: remove page from object and hash
+ *
+ * => caller must lock object
+ * => caller must lock page queues
+ */
+
+void __inline
+uvm_pageremove(pg)
+ struct vm_page *pg;
+{
+ struct pglist *buck;
+ int s;
+
+#ifdef DIAGNOSTIC
+ if ((pg->flags & (PG_FAULTING)) != 0)
+ panic("uvm_pageremove: page is faulting");
+#endif
+
+ if ((pg->flags & PG_TABLED) == 0)
+ return; /* XXX: log */
+
+ buck = &uvm.page_hash[uvm_pagehash(pg->uobject,pg->offset)];
+ s = splimp();
+ simple_lock(&uvm.hashlock);
+ TAILQ_REMOVE(buck, pg, hashq);
+ simple_unlock(&uvm.hashlock);
+ splx(s);
+
+ /* object should be locked */
+ TAILQ_REMOVE(&pg->uobject->memq, pg, listq);
+
+ pg->flags &= ~PG_TABLED;
+ pg->uobject->uo_npages--;
+ pg->uobject = NULL;
+ pg->version++;
+
+}
+
+/*
+ * uvm_page_init: init the page system. called from uvm_init().
+ *
+ * => we return the range of kernel virtual memory in kvm_startp/kvm_endp
+ */
+
+void
+uvm_page_init(kvm_startp, kvm_endp)
+ vaddr_t *kvm_startp, *kvm_endp;
+{
+ int freepages, pagecount;
+ vm_page_t pagearray;
+ int lcv, n, i;
+ paddr_t paddr;
+
+
+ /*
+ * step 1: init the page queues and page queue locks
+ */
+ for (lcv = 0; lcv < VM_NFREELIST; lcv++)
+ TAILQ_INIT(&uvm.page_free[lcv]);
+ TAILQ_INIT(&uvm.page_active);
+ TAILQ_INIT(&uvm.page_inactive_swp);
+ TAILQ_INIT(&uvm.page_inactive_obj);
+ simple_lock_init(&uvm.pageqlock);
+ simple_lock_init(&uvm.fpageqlock);
+
+ /*
+ * step 2: init the <obj,offset> => <page> hash table. for now
+ * we just have one bucket (the bootstrap bucket). later on we
+ * will malloc() new buckets as we dynamically resize the hash table.
+ */
+
+ uvm.page_nhash = 1; /* 1 bucket */
+ uvm.page_hashmask = 0; /* mask for hash function */
+ uvm.page_hash = &uvm_bootbucket; /* install bootstrap bucket */
+ TAILQ_INIT(uvm.page_hash); /* init hash table */
+ simple_lock_init(&uvm.hashlock); /* init hash table lock */
+
+ /*
+ * step 3: allocate vm_page structures.
+ */
+
+ /*
+ * sanity check:
+ * before calling this function the MD code is expected to register
+ * some free RAM with the uvm_page_physload() function. our job
+ * now is to allocate vm_page structures for this memory.
+ */
+
+ if (vm_nphysseg == 0)
+ panic("vm_page_bootstrap: no memory pre-allocated");
+
+ /*
+ * first calculate the number of free pages...
+ *
+ * note that we use start/end rather than avail_start/avail_end.
+ * this allows us to allocate extra vm_page structures in case we
+ * want to return some memory to the pool after booting.
+ */
+
+ freepages = 0;
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ freepages += (vm_physmem[lcv].end - vm_physmem[lcv].start);
+
+ /*
+ * we now know we have (PAGE_SIZE * freepages) bytes of memory we can
+ * use. for each page of memory we use we need a vm_page structure.
+ * thus, the total number of pages we can use is the total size of
+ * the memory divided by the PAGE_SIZE plus the size of the vm_page
+ * structure. we add one to freepages as a fudge factor to avoid
+ * truncation errors (since we can only allocate in terms of whole
+ * pages).
+ */
+
+ pagecount = ((freepages + 1) << PAGE_SHIFT) /
+ (PAGE_SIZE + sizeof(struct vm_page));
+ pagearray = (vm_page_t)uvm_pageboot_alloc(pagecount *
+ sizeof(struct vm_page));
+ bzero(pagearray, pagecount * sizeof(struct vm_page));
+
+ /*
+ * step 4: init the vm_page structures and put them in the correct
+ * place...
+ */
+
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
+
+ n = vm_physmem[lcv].end - vm_physmem[lcv].start;
+ if (n > pagecount) {
+ printf("uvm_page_init: lost %d page(s) in init\n",
+ n - pagecount);
+ panic("uvm_page_init"); /* XXXCDC: shouldn't happen? */
+ /* n = pagecount; */
+ }
+ /* set up page array pointers */
+ vm_physmem[lcv].pgs = pagearray;
+ pagearray += n;
+ pagecount -= n;
+ vm_physmem[lcv].lastpg = vm_physmem[lcv].pgs + (n - 1);
+
+ /* init and free vm_pages (we've already zeroed them) */
+ paddr = ptoa(vm_physmem[lcv].start);
+ for (i = 0 ; i < n ; i++, paddr += PAGE_SIZE) {
+ vm_physmem[lcv].pgs[i].phys_addr = paddr;
+ if (atop(paddr) >= vm_physmem[lcv].avail_start &&
+ atop(paddr) <= vm_physmem[lcv].avail_end) {
+ uvmexp.npages++;
+ /* add page to free pool */
+ uvm_pagefree(&vm_physmem[lcv].pgs[i]);
+ }
+ }
+ }
+ /*
+ * step 5: pass up the values of virtual_space_start and
+ * virtual_space_end (obtained by uvm_pageboot_alloc) to the upper
+ * layers of the VM.
+ */
+
+ *kvm_startp = round_page(virtual_space_start);
+ *kvm_endp = trunc_page(virtual_space_end);
+
+ /*
+ * step 6: init pagedaemon lock
+ */
+
+ simple_lock_init(&uvm.pagedaemon_lock);
+
+ /*
+ * step 7: init reserve thresholds
+ * XXXCDC - values may need adjusting
+ */
+ uvmexp.reserve_pagedaemon = 1;
+ uvmexp.reserve_kernel = 5;
+
+ /*
+ * done!
+ */
+
+}
+
+/*
+ * uvm_setpagesize: set the page size
+ *
+ * => sets page_shift and page_mask from uvmexp.pagesize.
+ * => XXXCDC: move global vars.
+ */
+
+void
+uvm_setpagesize()
+{
+ if (uvmexp.pagesize == 0)
+ uvmexp.pagesize = DEFAULT_PAGE_SIZE;
+ uvmexp.pagemask = uvmexp.pagesize - 1;
+ if ((uvmexp.pagemask & uvmexp.pagesize) != 0)
+ panic("uvm_setpagesize: page size not a power of two");
+ for (uvmexp.pageshift = 0; ; uvmexp.pageshift++)
+ if ((1 << uvmexp.pageshift) == uvmexp.pagesize)
+ break;
+}
+
+/*
+ * uvm_pageboot_alloc: steal memory from physmem for bootstrapping
+ */
+
+vaddr_t
+uvm_pageboot_alloc(size)
+ vsize_t size;
+{
+#if defined(PMAP_STEAL_MEMORY)
+ vaddr_t addr;
+
+ /*
+ * defer bootstrap allocation to MD code (it may want to allocate
+ * from a direct-mapped segment). pmap_steal_memory should round
+ * off virtual_space_start/virtual_space_end.
+ */
+
+ addr = pmap_steal_memory(size, &virtual_space_start,
+ &virtual_space_end);
+
+ return(addr);
+
+#else /* !PMAP_STEAL_MEMORY */
+
+ vaddr_t addr, vaddr;
+ paddr_t paddr;
+
+ /* round to page size */
+ size = round_page(size);
+
+ /*
+ * on first call to this function init ourselves. we detect this
+ * by checking virtual_space_start/end which are in the zero'd BSS area.
+ */
+
+ if (virtual_space_start == virtual_space_end) {
+ pmap_virtual_space(&virtual_space_start, &virtual_space_end);
+
+ /* round it the way we like it */
+ virtual_space_start = round_page(virtual_space_start);
+ virtual_space_end = trunc_page(virtual_space_end);
+ }
+
+ /*
+ * allocate virtual memory for this request
+ */
+
+ addr = virtual_space_start;
+ virtual_space_start += size;
+
+ /*
+ * allocate and mapin physical pages to back new virtual pages
+ */
+
+ for (vaddr = round_page(addr) ; vaddr < addr + size ;
+ vaddr += PAGE_SIZE) {
+
+ if (!uvm_page_physget(&paddr))
+ panic("uvm_pageboot_alloc: out of memory");
+
+ /* XXX: should be wired, but some pmaps don't like that ... */
+#if defined(PMAP_NEW)
+ pmap_kenter_pa(vaddr, paddr, VM_PROT_READ|VM_PROT_WRITE);
+#else
+ pmap_enter(pmap_kernel(), vaddr, paddr,
+ VM_PROT_READ|VM_PROT_WRITE, FALSE);
+#endif
+
+ }
+ return(addr);
+#endif /* PMAP_STEAL_MEMORY */
+}
+
+#if !defined(PMAP_STEAL_MEMORY)
+/*
+ * uvm_page_physget: "steal" one page from the vm_physmem structure.
+ *
+ * => attempt to allocate it off the end of a segment in which the "avail"
+ * values match the start/end values. if we can't do that, then we
+ * will advance both values (making them equal, and removing some
+ * vm_page structures from the non-avail area).
+ * => return false if out of memory.
+ */
+
+boolean_t
+uvm_page_physget(paddrp)
+ paddr_t *paddrp;
+{
+ int lcv, x;
+
+ /* pass 1: try allocating from a matching end */
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+ for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
+#else
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+#endif
+ {
+
+ if (vm_physmem[lcv].pgs)
+ panic("vm_page_physget: called _after_ bootstrap");
+
+ /* try from front */
+ if (vm_physmem[lcv].avail_start == vm_physmem[lcv].start &&
+ vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
+ *paddrp = ptoa(vm_physmem[lcv].avail_start);
+ vm_physmem[lcv].avail_start++;
+ vm_physmem[lcv].start++;
+ /* nothing left? nuke it */
+ if (vm_physmem[lcv].avail_start ==
+ vm_physmem[lcv].end) {
+ if (vm_nphysseg == 1)
+ panic("vm_page_physget: out of memory!");
+ vm_nphysseg--;
+ for (x = lcv ; x < vm_nphysseg ; x++)
+ /* structure copy */
+ vm_physmem[x] = vm_physmem[x+1];
+ }
+ return (TRUE);
+ }
+
+ /* try from rear */
+ if (vm_physmem[lcv].avail_end == vm_physmem[lcv].end &&
+ vm_physmem[lcv].avail_start < vm_physmem[lcv].avail_end) {
+ *paddrp = ptoa(vm_physmem[lcv].avail_end - 1);
+ vm_physmem[lcv].avail_end--;
+ vm_physmem[lcv].end--;
+ /* nothing left? nuke it */
+ if (vm_physmem[lcv].avail_end ==
+ vm_physmem[lcv].start) {
+ if (vm_nphysseg == 1)
+ panic("vm_page_physget: out of memory!");
+ vm_nphysseg--;
+ for (x = lcv ; x < vm_nphysseg ; x++)
+ /* structure copy */
+ vm_physmem[x] = vm_physmem[x+1];
+ }
+ return (TRUE);
+ }
+ }
+
+ /* pass2: forget about matching ends, just allocate something */
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+ for (lcv = vm_nphysseg - 1 ; lcv >= 0 ; lcv--)
+#else
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+#endif
+ {
+
+ /* any room in this bank? */
+ if (vm_physmem[lcv].avail_start >= vm_physmem[lcv].avail_end)
+ continue; /* nope */
+
+ *paddrp = ptoa(vm_physmem[lcv].avail_start);
+ vm_physmem[lcv].avail_start++;
+ /* truncate! */
+ vm_physmem[lcv].start = vm_physmem[lcv].avail_start;
+
+ /* nothing left? nuke it */
+ if (vm_physmem[lcv].avail_start == vm_physmem[lcv].end) {
+ if (vm_nphysseg == 1)
+ panic("vm_page_physget: out of memory!");
+ vm_nphysseg--;
+ for (x = lcv ; x < vm_nphysseg ; x++)
+ /* structure copy */
+ vm_physmem[x] = vm_physmem[x+1];
+ }
+ return (TRUE);
+ }
+
+ return (FALSE); /* whoops! */
+}
+#endif /* PMAP_STEAL_MEMORY */
+
+/*
+ * uvm_page_physload: load physical memory into VM system
+ *
+ * => all args are PFs
+ * => all pages in start/end get vm_page structures
+ * => areas marked by avail_start/avail_end get added to the free page pool
+ * => we are limited to VM_PHYSSEG_MAX physical memory segments
+ */
+
+void
+uvm_page_physload(start, end, avail_start, avail_end, free_list)
+ vaddr_t start, end, avail_start, avail_end;
+ int free_list;
+{
+ int preload, lcv;
+ psize_t npages;
+ struct vm_page *pgs;
+ struct vm_physseg *ps;
+
+ if (uvmexp.pagesize == 0)
+ panic("vm_page_physload: page size not set!");
+
+ if (free_list >= VM_NFREELIST || free_list < VM_FREELIST_DEFAULT)
+ panic("uvm_page_physload: bad free list %d\n", free_list);
+
+ /*
+ * do we have room?
+ */
+ if (vm_nphysseg == VM_PHYSSEG_MAX) {
+ printf("vm_page_physload: unable to load physical memory "
+ "segment\n");
+ printf("\t%d segments allocated, ignoring 0x%lx -> 0x%lx\n",
+ VM_PHYSSEG_MAX, start, end);
+ return;
+ }
+
+ /*
+ * check to see if this is a "preload" (i.e. uvm_mem_init hasn't been
+ * called yet, so malloc is not available).
+ */
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++) {
+ if (vm_physmem[lcv].pgs)
+ break;
+ }
+ preload = (lcv == vm_nphysseg);
+
+ /*
+ * if VM is already running, attempt to malloc() vm_page structures
+ */
+ if (!preload) {
+#if defined(VM_PHYSSEG_NOADD)
+ panic("vm_page_physload: tried to add RAM after vm_mem_init");
+#else
+ /* XXXCDC: need some sort of lockout for this case */
+ paddr_t paddr;
+ npages = end - start; /* # of pages */
+ MALLOC(pgs, struct vm_page *, sizeof(struct vm_page) * npages,
+ M_VMPAGE, M_NOWAIT);
+ if (pgs == NULL) {
+ printf("vm_page_physload: can not malloc vm_page "
+ "structs for segment\n");
+ printf("\tignoring 0x%lx -> 0x%lx\n", start, end);
+ return;
+ }
+ /* zero data, init phys_addr and free_list, and free pages */
+ bzero(pgs, sizeof(struct vm_page) * npages);
+ for (lcv = 0, paddr = ptoa(start) ;
+ lcv < npages ; lcv++, paddr += PAGE_SIZE) {
+ pgs[lcv].phys_addr = paddr;
+ pgs[lcv].free_list = free_list;
+ if (atop(paddr) >= avail_start &&
+ atop(paddr) <= avail_end)
+ uvm_pagefree(&pgs[lcv]);
+ }
+ /* XXXCDC: incomplete: need to update uvmexp.free, what else? */
+ /* XXXCDC: need hook to tell pmap to rebuild pv_list, etc... */
+#endif
+ } else {
+
+ /* gcc complains if these don't get init'd */
+ pgs = NULL;
+ npages = 0;
+
+ }
+
+ /*
+ * now insert us in the proper place in vm_physmem[]
+ */
+
+#if (VM_PHYSSEG_STRAT == VM_PSTRAT_RANDOM)
+
+ /* random: put it at the end (easy!) */
+ ps = &vm_physmem[vm_nphysseg];
+
+#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BSEARCH)
+
+ {
+ int x;
+ /* sort by address for binary search */
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ if (start < vm_physmem[lcv].start)
+ break;
+ ps = &vm_physmem[lcv];
+ /* move back other entries, if necessary ... */
+ for (x = vm_nphysseg ; x > lcv ; x--)
+ /* structure copy */
+ vm_physmem[x] = vm_physmem[x - 1];
+ }
+
+#elif (VM_PHYSSEG_STRAT == VM_PSTRAT_BIGFIRST)
+
+ {
+ int x;
+ /* sort by largest segment first */
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ if ((end - start) >
+ (vm_physmem[lcv].end - vm_physmem[lcv].start))
+ break;
+ ps = &vm_physmem[lcv];
+ /* move back other entries, if necessary ... */
+ for (x = vm_nphysseg ; x > lcv ; x--)
+ /* structure copy */
+ vm_physmem[x] = vm_physmem[x - 1];
+ }
+
+#else
+
+ panic("vm_page_physload: unknown physseg strategy selected!");
+
+#endif
+
+ ps->start = start;
+ ps->end = end;
+ ps->avail_start = avail_start;
+ ps->avail_end = avail_end;
+ if (preload) {
+ ps->pgs = NULL;
+ } else {
+ ps->pgs = pgs;
+ ps->lastpg = pgs + npages - 1;
+ }
+ ps->free_list = free_list;
+ vm_nphysseg++;
+
+ /*
+ * done!
+ */
+
+ if (!preload)
+ uvm_page_rehash();
+
+ return;
+}
+
+/*
+ * uvm_page_rehash: reallocate hash table based on number of free pages.
+ */
+
+void
+uvm_page_rehash()
+{
+ int freepages, lcv, bucketcount, s, oldcount;
+ struct pglist *newbuckets, *oldbuckets;
+ struct vm_page *pg;
+
+ /*
+ * compute number of pages that can go in the free pool
+ */
+
+ freepages = 0;
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ freepages +=
+ (vm_physmem[lcv].avail_end - vm_physmem[lcv].avail_start);
+
+ /*
+ * compute number of buckets needed for this number of pages
+ */
+
+ bucketcount = 1;
+ while (bucketcount < freepages)
+ bucketcount = bucketcount * 2;
+
+ /*
+ * malloc new buckets
+ */
+
+ MALLOC(newbuckets, struct pglist *, sizeof(struct pglist) * bucketcount,
+ M_VMPBUCKET, M_NOWAIT);
+ if (newbuckets == NULL) {
+ printf("vm_page_physrehash: WARNING: could not grow page "
+ "hash table\n");
+ return;
+ }
+ for (lcv = 0 ; lcv < bucketcount ; lcv++)
+ TAILQ_INIT(&newbuckets[lcv]);
+
+ /*
+ * now replace the old buckets with the new ones and rehash everything
+ */
+
+ s = splimp();
+ simple_lock(&uvm.hashlock);
+ /* swap old for new ... */
+ oldbuckets = uvm.page_hash;
+ oldcount = uvm.page_nhash;
+ uvm.page_hash = newbuckets;
+ uvm.page_nhash = bucketcount;
+ uvm.page_hashmask = bucketcount - 1; /* power of 2 */
+
+ /* ... and rehash */
+ for (lcv = 0 ; lcv < oldcount ; lcv++) {
+ while ((pg = oldbuckets[lcv].tqh_first) != NULL) {
+ TAILQ_REMOVE(&oldbuckets[lcv], pg, hashq);
+ TAILQ_INSERT_TAIL(
+ &uvm.page_hash[uvm_pagehash(pg->uobject, pg->offset)],
+ pg, hashq);
+ }
+ }
+ simple_unlock(&uvm.hashlock);
+ splx(s);
+
+ /*
+ * free old bucket array if we malloc'd it previously
+ */
+
+ if (oldbuckets != &uvm_bootbucket)
+ FREE(oldbuckets, M_VMPBUCKET);
+
+ /*
+ * done
+ */
+ return;
+}
+
+
+#if 1 /* XXXCDC: TMP TMP TMP DEBUG DEBUG DEBUG */
+
+void uvm_page_physdump __P((void)); /* SHUT UP GCC */
+
+/* call from DDB */
+void
+uvm_page_physdump()
+{
+ int lcv;
+
+ printf("rehash: physical memory config [segs=%d of %d]:\n",
+ vm_nphysseg, VM_PHYSSEG_MAX);
+ for (lcv = 0 ; lcv < vm_nphysseg ; lcv++)
+ printf("0x%lx->0x%lx [0x%lx->0x%lx]\n", vm_physmem[lcv].start,
+ vm_physmem[lcv].end, vm_physmem[lcv].avail_start,
+ vm_physmem[lcv].avail_end);
+ printf("STRATEGY = ");
+ switch (VM_PHYSSEG_STRAT) {
+ case VM_PSTRAT_RANDOM: printf("RANDOM\n"); break;
+ case VM_PSTRAT_BSEARCH: printf("BSEARCH\n"); break;
+ case VM_PSTRAT_BIGFIRST: printf("BIGFIRST\n"); break;
+ default: printf("<<UNKNOWN>>!!!!\n");
+ }
+ printf("number of buckets = %d\n", uvm.page_nhash);
+}
+#endif
+
+/*
+ * uvm_pagealloc_strat: allocate vm_page from a particular free list.
+ *
+ * => return null if no pages free
+ * => wake up pagedaemon if number of free pages drops below low water mark
+ * => if obj != NULL, obj must be locked (to put in hash)
+ * => if anon != NULL, anon must be locked (to put in anon)
+ * => only one of obj or anon can be non-null
+ * => caller must activate/deactivate page if it is not wired.
+ * => free_list is ignored if strat == UVM_PGA_STRAT_NORMAL.
+ */
+
+struct vm_page *
+uvm_pagealloc_strat(obj, off, anon, strat, free_list)
+ struct uvm_object *obj;
+ vaddr_t off;
+ struct vm_anon *anon;
+ int strat, free_list;
+{
+ int lcv, s;
+ struct vm_page *pg;
+ struct pglist *freeq;
+
+#ifdef DIAGNOSTIC
+ /* sanity check */
+ if (obj && anon)
+ panic("uvm_pagealloc: obj and anon != NULL");
+#endif
+
+ s = splimp();
+
+ uvm_lock_fpageq(); /* lock free page queue */
+
+ /*
+ * check to see if we need to generate some free pages waking
+ * the pagedaemon.
+ */
+
+ if (uvmexp.free < uvmexp.freemin || (uvmexp.free < uvmexp.freetarg &&
+ uvmexp.inactive < uvmexp.inactarg))
+ thread_wakeup(&uvm.pagedaemon);
+
+ /*
+ * fail if any of these conditions is true:
+ * [1] there really are no free pages, or
+ * [2] only kernel "reserved" pages remain and
+ * the page isn't being allocated to a kernel object.
+ * [3] only pagedaemon "reserved" pages remain and
+ * the requestor isn't the pagedaemon.
+ */
+
+ if ((uvmexp.free <= uvmexp.reserve_kernel &&
+ !(obj && obj->uo_refs == UVM_OBJ_KERN)) ||
+ (uvmexp.free <= uvmexp.reserve_pagedaemon &&
+ !(obj == uvmexp.kmem_object && curproc == uvm.pagedaemon_proc)))
+ goto fail;
+
+ again:
+ switch (strat) {
+ case UVM_PGA_STRAT_NORMAL:
+ /* Check all freelists in descending priority order. */
+ for (lcv = 0; lcv < VM_NFREELIST; lcv++) {
+ freeq = &uvm.page_free[lcv];
+ if ((pg = freeq->tqh_first) != NULL)
+ goto gotit;
+ }
+
+ /* No pages free! */
+ goto fail;
+
+ case UVM_PGA_STRAT_ONLY:
+ case UVM_PGA_STRAT_FALLBACK:
+ /* Attempt to allocate from the specified free list. */
+#ifdef DIAGNOSTIC
+ if (free_list >= VM_NFREELIST || free_list < 0)
+ panic("uvm_pagealloc_strat: bad free list %d",
+ free_list);
+#endif
+ freeq = &uvm.page_free[free_list];
+ if ((pg = freeq->tqh_first) != NULL)
+ goto gotit;
+
+ /* Fall back, if possible. */
+ if (strat == UVM_PGA_STRAT_FALLBACK) {
+ strat = UVM_PGA_STRAT_NORMAL;
+ goto again;
+ }
+
+ /* No pages free! */
+ goto fail;
+
+ default:
+ panic("uvm_pagealloc_strat: bad strat %d", strat);
+ /* NOTREACHED */
+ }
+
+ gotit:
+ TAILQ_REMOVE(freeq, pg, pageq);
+ uvmexp.free--;
+
+ uvm_unlock_fpageq(); /* unlock free page queue */
+ splx(s);
+
+ pg->offset = off;
+ pg->uobject = obj;
+ pg->uanon = anon;
+ pg->flags = PG_BUSY|PG_CLEAN|PG_FAKE;
+ pg->version++;
+ pg->wire_count = 0;
+ pg->loan_count = 0;
+ if (anon) {
+ anon->u.an_page = pg;
+ pg->pqflags = PQ_ANON;
+ } else {
+ if (obj)
+ uvm_pageinsert(pg);
+ pg->pqflags = 0;
+ }
+#if defined(UVM_PAGE_TRKOWN)
+ pg->owner_tag = NULL;
+#endif
+ UVM_PAGE_OWN(pg, "new alloc");
+
+ return(pg);
+
+ fail:
+ uvm_unlock_fpageq();
+ splx(s);
+ return (NULL);
+}
+
+/*
+ * uvm_pagerealloc: reallocate a page from one object to another
+ *
+ * => both objects must be locked
+ */
+
+void
+uvm_pagerealloc(pg, newobj, newoff)
+ struct vm_page *pg;
+ struct uvm_object *newobj;
+ vaddr_t newoff;
+{
+ /*
+ * remove it from the old object
+ */
+
+ if (pg->uobject) {
+ uvm_pageremove(pg);
+ }
+
+ /*
+ * put it in the new object
+ */
+
+ if (newobj) {
+ pg->uobject = newobj;
+ pg->offset = newoff;
+ pg->version++;
+ uvm_pageinsert(pg);
+ }
+
+ return;
+}
+
+
+/*
+ * uvm_pagefree: free page
+ *
+ * => erase page's identity (i.e. remove from hash/object)
+ * => put page on free list
+ * => caller must lock owning object (either anon or uvm_object)
+ * => caller must lock page queues
+ * => assumes all valid mappings of pg are gone
+ */
+
+void uvm_pagefree(pg)
+
+struct vm_page *pg;
+
+{
+ int s;
+ int saved_loan_count = pg->loan_count;
+
+ /*
+ * if the page was an object page (and thus "TABLED"), remove it
+ * from the object.
+ */
+
+ if (pg->flags & PG_TABLED) {
+
+ /*
+ * if the object page is on loan we are going to drop ownership.
+ * it is possible that an anon will take over as owner for this
+ * page later on. the anon will want a !PG_CLEAN page so that
+ * it knows it needs to allocate swap if it wants to page the
+ * page out.
+ */
+
+ if (saved_loan_count)
+ pg->flags &= ~PG_CLEAN; /* in case an anon takes over */
+
+ uvm_pageremove(pg);
+
+ /*
+ * if our page was on loan, then we just lost control over it
+ * (in fact, if it was loaned to an anon, the anon may have
+ * already taken over ownership of the page by now and thus
+ * changed the loan_count [e.g. in uvmfault_anonget()]) we just
+ * return (when the last loan is dropped, then the page can be
+ * freed by whatever was holding the last loan).
+ */
+ if (saved_loan_count)
+ return;
+
+ } else if (saved_loan_count && (pg->pqflags & PQ_ANON)) {
+
+ /*
+ * if our page is owned by an anon and is loaned out to the
+ * kernel then we just want to drop ownership and return.
+ * the kernel must free the page when all its loans clear ...
+ * note that the kernel can't change the loan status of our
+ * page as long as we are holding PQ lock.
+ */
+ pg->pqflags &= ~PQ_ANON;
+ pg->uanon = NULL;
+ return;
+ }
+
+#ifdef DIAGNOSTIC
+ if (saved_loan_count) {
+ printf("uvm_pagefree: warning: freeing page with a loan "
+ "count of %d\n", saved_loan_count);
+ panic("uvm_pagefree: loan count");
+ }
+#endif
+
+
+ /*
+ * now remove the page from the queues
+ */
+
+ if (pg->pqflags & PQ_ACTIVE) {
+ TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+ pg->pqflags &= ~PQ_ACTIVE;
+ uvmexp.active--;
+ }
+ if (pg->pqflags & PQ_INACTIVE) {
+ if (pg->pqflags & PQ_SWAPBACKED)
+ TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+ else
+ TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+ pg->pqflags &= ~PQ_INACTIVE;
+ uvmexp.inactive--;
+ }
+
+ /*
+ * if the page was wired, unwire it now.
+ */
+ if (pg->wire_count)
+ {
+ pg->wire_count = 0;
+ uvmexp.wired--;
+ }
+
+ /*
+ * and put on free queue
+ */
+
+ s = splimp();
+ uvm_lock_fpageq();
+ TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
+ pg, pageq);
+ pg->pqflags = PQ_FREE;
+#ifdef DEBUG
+ pg->uobject = (void *)0xdeadbeef;
+ pg->offset = 0xdeadbeef;
+ pg->uanon = (void *)0xdeadbeef;
+#endif
+ uvmexp.free++;
+ uvm_unlock_fpageq();
+ splx(s);
+}
+
+#if defined(UVM_PAGE_TRKOWN)
+/*
+ * uvm_page_own: set or release page ownership
+ *
+ * => this is a debugging function that keeps track of who sets PG_BUSY
+ * and where they do it. it can be used to track down problems
+ * such a process setting "PG_BUSY" and never releasing it.
+ * => page's object [if any] must be locked
+ * => if "tag" is NULL then we are releasing page ownership
+ */
+void
+uvm_page_own(pg, tag)
+ struct vm_page *pg;
+ char *tag;
+{
+ /* gain ownership? */
+ if (tag) {
+ if (pg->owner_tag) {
+ printf("uvm_page_own: page %p already owned "
+ "by proc %d [%s]\n", pg,
+ pg->owner, pg->owner_tag);
+ panic("uvm_page_own");
+ }
+ pg->owner = (curproc) ? curproc->p_pid : (pid_t) -1;
+ pg->owner_tag = tag;
+ return;
+ }
+
+ /* drop ownership */
+ if (pg->owner_tag == NULL) {
+ printf("uvm_page_own: dropping ownership of an non-owned "
+ "page (%p)\n", pg);
+ panic("uvm_page_own");
+ }
+ pg->owner_tag = NULL;
+ return;
+}
+#endif
diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h
new file mode 100644
index 00000000000..dd40fc5bee1
--- /dev/null
+++ b/sys/uvm/uvm_page.h
@@ -0,0 +1,132 @@
+/* $NetBSD: uvm_page.h,v 1.10 1998/08/13 02:11:02 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.h 7.3 (Berkeley) 4/21/91
+ * from: Id: uvm_page.h,v 1.1.2.6 1998/02/04 02:31:42 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PAGE_H_
+#define _UVM_UVM_PAGE_H_
+
+/*
+ * uvm_page.h
+ */
+
+/*
+ * macros
+ */
+
+#define uvm_lock_pageq() simple_lock(&uvm.pageqlock)
+#define uvm_unlock_pageq() simple_unlock(&uvm.pageqlock)
+#define uvm_lock_fpageq() simple_lock(&uvm.fpageqlock)
+#define uvm_unlock_fpageq() simple_unlock(&uvm.fpageqlock)
+
+#define uvm_pagehash(obj,off) \
+ (((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask)
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_PAGE_INLINE
+#define PAGE_INLINE static __inline
+#else
+#define PAGE_INLINE /* nothing */
+#endif /* UVM_PAGE_INLINE */
+
+/*
+ * prototypes: the following prototypes define the interface to pages
+ */
+
+void uvm_page_init __P((vaddr_t *, vaddr_t *));
+#if defined(UVM_PAGE_TRKOWN)
+void uvm_page_own __P((struct vm_page *, char *));
+#endif
+#if !defined(PMAP_STEAL_MEMORY)
+boolean_t uvm_page_physget __P((paddr_t *));
+#endif
+void uvm_page_rehash __P((void));
+
+PAGE_INLINE void uvm_pageactivate __P((struct vm_page *));
+vaddr_t uvm_pageboot_alloc __P((vsize_t));
+PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *));
+PAGE_INLINE void uvm_pagedeactivate __P((struct vm_page *));
+void uvm_pagefree __P((struct vm_page *));
+PAGE_INLINE struct vm_page *uvm_pagelookup
+ __P((struct uvm_object *, vaddr_t));
+void uvm_pageremove __P((struct vm_page *));
+/* uvm_pagerename: not needed */
+PAGE_INLINE void uvm_pageunwire __P((struct vm_page *));
+PAGE_INLINE void uvm_pagewait __P((struct vm_page *, int));
+PAGE_INLINE void uvm_pagewake __P((struct vm_page *));
+PAGE_INLINE void uvm_pagewire __P((struct vm_page *));
+PAGE_INLINE void uvm_pagezero __P((struct vm_page *));
+
+PAGE_INLINE int uvm_page_lookup_freelist __P((struct vm_page *));
+
+#endif /* _UVM_UVM_PAGE_H_ */
diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h
new file mode 100644
index 00000000000..5a5671a3f4e
--- /dev/null
+++ b/sys/uvm/uvm_page_i.h
@@ -0,0 +1,292 @@
+/* $NetBSD: uvm_page_i.h,v 1.8 1998/08/13 02:11:02 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_page.c 8.3 (Berkeley) 3/21/94
+ * from: Id: uvm_page_i.h,v 1.1.2.7 1998/01/05 00:26:02 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PAGE_I_H_
+#define _UVM_UVM_PAGE_I_H_
+
+/*
+ * uvm_page_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_PAGE_INLINE) || defined(UVM_PAGE)
+
+/*
+ * uvm_pagelookup: look up a page
+ *
+ * => caller should lock object to keep someone from pulling the page
+ * out from under it
+ */
+
+struct vm_page *
+uvm_pagelookup(obj, off)
+ struct uvm_object *obj;
+ vaddr_t off;
+{
+ struct vm_page *pg;
+ struct pglist *buck;
+ int s;
+
+ buck = &uvm.page_hash[uvm_pagehash(obj,off)];
+
+ s = splimp();
+ simple_lock(&uvm.hashlock);
+ for (pg = buck->tqh_first ; pg != NULL ; pg = pg->hashq.tqe_next) {
+ if (pg->uobject == obj && pg->offset == off) {
+ simple_unlock(&uvm.hashlock);
+ splx(s);
+ return(pg);
+ }
+ }
+ simple_unlock(&uvm.hashlock);
+ splx(s);
+ return(NULL);
+}
+
+/*
+ * uvm_pagewire: wire the page, thus removing it from the daemon's grasp
+ *
+ * => caller must lock page queues
+ */
+
+PAGE_INLINE void
+uvm_pagewire(pg)
+ struct vm_page *pg;
+{
+
+ if (pg->wire_count == 0) {
+ if (pg->pqflags & PQ_ACTIVE) {
+ TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+ pg->pqflags &= ~PQ_ACTIVE;
+ uvmexp.active--;
+ }
+ if (pg->pqflags & PQ_INACTIVE) {
+ if (pg->pqflags & PQ_SWAPBACKED)
+ TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+ else
+ TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+ pg->pqflags &= ~PQ_INACTIVE;
+ uvmexp.inactive--;
+ }
+ uvmexp.wired++;
+ }
+ pg->wire_count++;
+}
+
+/*
+ * uvm_pageunwire: unwire the page.
+ *
+ * => activate if wire count goes to zero.
+ * => caller must lock page queues
+ */
+
+PAGE_INLINE void
+uvm_pageunwire(pg)
+ struct vm_page *pg;
+{
+
+ pg->wire_count--;
+ if (pg->wire_count == 0) {
+ TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+ uvmexp.active++;
+ pg->pqflags |= PQ_ACTIVE;
+ uvmexp.wired--;
+ }
+}
+
+/*
+ * uvm_pagedeactivate: deactivate page -- no pmaps have access to page
+ *
+ * => caller must lock page queues
+ * => caller must check to make sure page is not wired
+ * => object that page belongs to must be locked (so we can adjust pg->flags)
+ */
+
+PAGE_INLINE void
+uvm_pagedeactivate(pg)
+ struct vm_page *pg;
+{
+ if (pg->pqflags & PQ_ACTIVE) {
+ TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+ pg->pqflags &= ~PQ_ACTIVE;
+ uvmexp.active--;
+ }
+ if ((pg->pqflags & PQ_INACTIVE) == 0) {
+#ifdef DIAGNOSTIC
+ if (pg->wire_count)
+ panic("uvm_pagedeactivate: caller did not check "
+ "wire count");
+#endif
+ if (pg->pqflags & PQ_SWAPBACKED)
+ TAILQ_INSERT_TAIL(&uvm.page_inactive_swp, pg, pageq);
+ else
+ TAILQ_INSERT_TAIL(&uvm.page_inactive_obj, pg, pageq);
+ pg->pqflags |= PQ_INACTIVE;
+ uvmexp.inactive++;
+ pmap_clear_reference(PMAP_PGARG(pg));
+ if (pmap_is_modified(PMAP_PGARG(pg)))
+ pg->flags &= ~PG_CLEAN;
+ }
+}
+
+/*
+ * uvm_pageactivate: activate page
+ *
+ * => caller must lock page queues
+ */
+
+PAGE_INLINE void
+uvm_pageactivate(pg)
+ struct vm_page *pg;
+{
+ if (pg->pqflags & PQ_INACTIVE) {
+ if (pg->pqflags & PQ_SWAPBACKED)
+ TAILQ_REMOVE(&uvm.page_inactive_swp, pg, pageq);
+ else
+ TAILQ_REMOVE(&uvm.page_inactive_obj, pg, pageq);
+ pg->pqflags &= ~PQ_INACTIVE;
+ uvmexp.inactive--;
+ }
+ if (pg->wire_count == 0) {
+
+ /*
+ * if page is already active, remove it from list so we
+ * can put it at tail. if it wasn't active, then mark
+ * it active and bump active count
+ */
+ if (pg->pqflags & PQ_ACTIVE)
+ TAILQ_REMOVE(&uvm.page_active, pg, pageq);
+ else {
+ pg->pqflags |= PQ_ACTIVE;
+ uvmexp.active++;
+ }
+
+ TAILQ_INSERT_TAIL(&uvm.page_active, pg, pageq);
+ }
+}
+
+/*
+ * uvm_pagezero: zero fill a page
+ *
+ * => if page is part of an object then the object should be locked
+ * to protect pg->flags.
+ */
+
+PAGE_INLINE void
+uvm_pagezero(pg)
+ struct vm_page *pg;
+{
+
+ pg->flags &= ~PG_CLEAN;
+ pmap_zero_page(VM_PAGE_TO_PHYS(pg));
+}
+
+/*
+ * uvm_pagecopy: copy a page
+ *
+ * => if page is part of an object then the object should be locked
+ * to protect pg->flags.
+ */
+
+PAGE_INLINE void
+uvm_pagecopy(src, dst)
+ struct vm_page *src, *dst;
+{
+
+ dst->flags &= ~PG_CLEAN;
+ pmap_copy_page(VM_PAGE_TO_PHYS(src), VM_PAGE_TO_PHYS(dst));
+}
+
+/*
+ * uvm_page_lookup_freelist: look up the free list for the specified page
+ */
+
+PAGE_INLINE int
+uvm_page_lookup_freelist(pg)
+ struct vm_page *pg;
+{
+ int lcv;
+
+ lcv = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), NULL);
+#ifdef DIAGNOSTIC
+ if (lcv == -1)
+ panic("uvm_page_lookup_freelist: unable to locate physseg");
+#endif
+ return (vm_physmem[lcv].free_list);
+}
+
+#endif /* defined(UVM_PAGE_INLINE) || defined(UVM_PAGE) */
+
+#endif /* _UVM_UVM_PAGE_I_H_ */
diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c
new file mode 100644
index 00000000000..1b8c8a36d3e
--- /dev/null
+++ b/sys/uvm/uvm_pager.c
@@ -0,0 +1,762 @@
+/* $NetBSD: uvm_pager.c,v 1.14 1999/01/22 08:00:35 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager.c,v 1.1.2.23 1998/02/02 20:38:06 chuck Exp
+ */
+
+/*
+ * uvm_pager.c: generic functions used to assist the pagers.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#define UVM_PAGER
+#include <uvm/uvm.h>
+
+/*
+ * list of uvm pagers in the system
+ */
+
+extern struct uvm_pagerops aobj_pager;
+extern struct uvm_pagerops uvm_deviceops;
+extern struct uvm_pagerops uvm_vnodeops;
+
+struct uvm_pagerops *uvmpagerops[] = {
+ &aobj_pager,
+ &uvm_deviceops,
+ &uvm_vnodeops,
+};
+
+/*
+ * the pager map: provides KVA for I/O
+ */
+
+#define PAGER_MAP_SIZE (4 * 1024 * 1024)
+vm_map_t pager_map; /* XXX */
+simple_lock_data_t pager_map_wanted_lock;
+boolean_t pager_map_wanted; /* locked by pager map */
+
+
+/*
+ * uvm_pager_init: init pagers (at boot time)
+ */
+
+void
+uvm_pager_init()
+{
+ int lcv;
+
+ /*
+ * init pager map
+ */
+
+ pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
+ PAGER_MAP_SIZE, FALSE, FALSE, NULL);
+ simple_lock_init(&pager_map_wanted_lock);
+ pager_map_wanted = FALSE;
+
+ /*
+ * init ASYNC I/O queue
+ */
+
+ TAILQ_INIT(&uvm.aio_done);
+
+ /*
+ * call pager init functions
+ */
+ for (lcv = 0 ; lcv < sizeof(uvmpagerops)/sizeof(struct uvm_pagerops *);
+ lcv++) {
+ if (uvmpagerops[lcv]->pgo_init)
+ uvmpagerops[lcv]->pgo_init();
+ }
+}
+
+/*
+ * uvm_pagermapin: map pages into KVA (pager_map) for I/O that needs mappings
+ *
+ * we basically just map in a blank map entry to reserve the space in the
+ * map and then use pmap_enter() to put the mappings in by hand.
+ */
+
+vaddr_t
+uvm_pagermapin(pps, npages, aiop, waitf)
+ struct vm_page **pps;
+ int npages;
+ struct uvm_aiodesc **aiop; /* OUT */
+ int waitf;
+{
+ vsize_t size;
+ vaddr_t kva;
+ struct uvm_aiodesc *aio;
+#if !defined(PMAP_NEW)
+ vaddr_t cva;
+ struct vm_page *pp;
+#endif
+ UVMHIST_FUNC("uvm_pagermapin"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist,"(pps=0x%x, npages=%d, aiop=0x%x, waitf=%d)",
+ pps, npages, aiop, waitf);
+
+ReStart:
+ if (aiop) {
+ MALLOC(aio, struct uvm_aiodesc *, sizeof(*aio), M_TEMP, waitf);
+ if (aio == NULL)
+ return(0);
+ *aiop = aio;
+ } else {
+ aio = NULL;
+ }
+
+ size = npages << PAGE_SHIFT;
+ kva = NULL; /* let system choose VA */
+
+ if (uvm_map(pager_map, &kva, size, NULL,
+ UVM_UNKNOWN_OFFSET, UVM_FLAG_NOMERGE) != KERN_SUCCESS) {
+ if (waitf == M_NOWAIT) {
+ if (aio)
+ FREE(aio, M_TEMP);
+ UVMHIST_LOG(maphist,"<- NOWAIT failed", 0,0,0,0);
+ return(NULL);
+ }
+ simple_lock(&pager_map_wanted_lock);
+ pager_map_wanted = TRUE;
+ UVMHIST_LOG(maphist, " SLEEPING on pager_map",0,0,0,0);
+ UVM_UNLOCK_AND_WAIT(pager_map, &pager_map_wanted_lock, FALSE,
+ "pager_map",0);
+ goto ReStart;
+ }
+
+#if defined(PMAP_NEW)
+ /*
+ * XXX: (ab)using the pmap module to store state info for us.
+ * (pmap stores the PAs... we fetch them back later and convert back
+ * to pages with PHYS_TO_VM_PAGE).
+ */
+ pmap_kenter_pgs(kva, pps, npages);
+
+#else /* PMAP_NEW */
+
+ /* got it */
+ for (cva = kva ; size != 0 ; size -= PAGE_SIZE, cva += PAGE_SIZE) {
+ pp = *pps++;
+#ifdef DEBUG
+ if ((pp->flags & PG_BUSY) == 0)
+ panic("uvm_pagermapin: page not busy");
+#endif
+
+ pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
+ VM_PROT_DEFAULT, TRUE);
+ }
+
+#endif /* PMAP_NEW */
+
+ UVMHIST_LOG(maphist, "<- done (KVA=0x%x)", kva,0,0,0);
+ return(kva);
+}
+
+/*
+ * uvm_pagermapout: remove pager_map mapping
+ *
+ * we remove our mappings by hand and then remove the mapping (waking
+ * up anyone wanting space).
+ */
+
+void
+uvm_pagermapout(kva, npages)
+ vaddr_t kva;
+ int npages;
+{
+ vsize_t size = npages << PAGE_SHIFT;
+ vm_map_entry_t entries;
+ UVMHIST_FUNC("uvm_pagermapout"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, " (kva=0x%x, npages=%d)", kva, npages,0,0);
+
+ /*
+ * duplicate uvm_unmap, but add in pager_map_wanted handling.
+ */
+
+ vm_map_lock(pager_map);
+ (void) uvm_unmap_remove(pager_map, kva, kva + size, &entries);
+ simple_lock(&pager_map_wanted_lock);
+ if (pager_map_wanted) {
+ pager_map_wanted = FALSE;
+ wakeup(pager_map);
+ }
+ simple_unlock(&pager_map_wanted_lock);
+ vm_map_unlock(pager_map);
+ if (entries)
+ uvm_unmap_detach(entries, 0);
+
+ UVMHIST_LOG(maphist,"<- done",0,0,0,0);
+}
+
+/*
+ * uvm_mk_pcluster
+ *
+ * generic "make 'pager put' cluster" function. a pager can either
+ * [1] set pgo_mk_pcluster to NULL (never cluster), [2] set it to this
+ * generic function, or [3] set it to a pager specific function.
+ *
+ * => caller must lock object _and_ pagequeues (since we need to look
+ * at active vs. inactive bits, etc.)
+ * => caller must make center page busy and write-protect it
+ * => we mark all cluster pages busy for the caller
+ * => the caller must unbusy all pages (and check wanted/released
+ * status if it drops the object lock)
+ * => flags:
+ * PGO_ALLPAGES: all pages in object are valid targets
+ * !PGO_ALLPAGES: use "lo" and "hi" to limit range of cluster
+ * PGO_DOACTCLUST: include active pages in cluster.
+ * NOTE: the caller should clear PG_CLEANCHK bits if PGO_DOACTCLUST.
+ * PG_CLEANCHK is only a hint, but clearing will help reduce
+ * the number of calls we make to the pmap layer.
+ */
+
+struct vm_page **
+uvm_mk_pcluster(uobj, pps, npages, center, flags, mlo, mhi)
+ struct uvm_object *uobj; /* IN */
+ struct vm_page **pps, *center; /* IN/OUT, IN */
+ int *npages, flags; /* IN/OUT, IN */
+ vaddr_t mlo, mhi; /* IN (if !PGO_ALLPAGES) */
+{
+ struct vm_page **ppsp, *pclust;
+ vaddr_t lo, hi, curoff;
+ int center_idx, forward;
+ UVMHIST_FUNC("uvm_mk_pcluster"); UVMHIST_CALLED(maphist);
+
+ /*
+ * center page should already be busy and write protected. XXX:
+ * suppose page is wired? if we lock, then a process could
+ * fault/block on it. if we don't lock, a process could write the
+ * pages in the middle of an I/O. (consider an msync()). let's
+ * lock it for now (better to delay than corrupt data?).
+ */
+
+ /*
+ * get cluster boundaries, check sanity, and apply our limits as well.
+ */
+
+ uobj->pgops->pgo_cluster(uobj, center->offset, &lo, &hi);
+ if ((flags & PGO_ALLPAGES) == 0) {
+ if (lo < mlo)
+ lo = mlo;
+ if (hi > mhi)
+ hi = mhi;
+ }
+ if ((hi - lo) >> PAGE_SHIFT > *npages) { /* pps too small, bail out! */
+#ifdef DIAGNOSTIC
+ printf("uvm_mk_pcluster: provided page array too small (fixed)\n");
+#endif
+ pps[0] = center;
+ *npages = 1;
+ return(pps);
+ }
+
+ /*
+ * now determine the center and attempt to cluster around the
+ * edges
+ */
+
+ center_idx = (center->offset - lo) >> PAGE_SHIFT;
+ pps[center_idx] = center; /* plug in the center page */
+ ppsp = &pps[center_idx];
+ *npages = 1;
+
+ /*
+ * attempt to cluster around the left [backward], and then
+ * the right side [forward].
+ *
+ * note that for inactive pages (pages that have been deactivated)
+ * there are no valid mappings and PG_CLEAN should be up to date.
+ * [i.e. there is no need to query the pmap with pmap_is_modified
+ * since there are no mappings].
+ */
+
+ for (forward = 0 ; forward <= 1 ; forward++) {
+
+ curoff = center->offset + (forward ? PAGE_SIZE : -PAGE_SIZE);
+ for ( ;(forward == 0 && curoff >= lo) ||
+ (forward && curoff < hi);
+ curoff += (forward ? 1 : -1) << PAGE_SHIFT) {
+
+ pclust = uvm_pagelookup(uobj, curoff); /* lookup page */
+ if (pclust == NULL)
+ break; /* no page */
+ /* handle active pages */
+ /* NOTE: inactive pages don't have pmap mappings */
+ if ((pclust->pqflags & PQ_INACTIVE) == 0) {
+ if ((flags & PGO_DOACTCLUST) == 0)
+ /* dont want mapped pages at all */
+ break;
+
+ /* make sure "clean" bit is sync'd */
+ if ((pclust->flags & PG_CLEANCHK) == 0) {
+ if ((pclust->flags & (PG_CLEAN|PG_BUSY))
+ == PG_CLEAN &&
+ pmap_is_modified(PMAP_PGARG(pclust)))
+ pclust->flags &= ~PG_CLEAN;
+ /* now checked */
+ pclust->flags |= PG_CLEANCHK;
+ }
+ }
+ /* is page available for cleaning and does it need it */
+ if ((pclust->flags & (PG_CLEAN|PG_BUSY)) != 0)
+ break; /* page is already clean or is busy */
+
+ /* yes! enroll the page in our array */
+ pclust->flags |= PG_BUSY; /* busy! */
+ UVM_PAGE_OWN(pclust, "uvm_mk_pcluster");
+ /* XXX: protect wired page? see above comment. */
+ pmap_page_protect(PMAP_PGARG(pclust), VM_PROT_READ);
+ if (!forward) {
+ ppsp--; /* back up one page */
+ *ppsp = pclust;
+ } else {
+ /* move forward one page */
+ ppsp[*npages] = pclust;
+ }
+ *npages = *npages + 1;
+ }
+ }
+
+ /*
+ * done! return the cluster array to the caller!!!
+ */
+
+ UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+ return(ppsp);
+}
+
+
+/*
+ * uvm_shareprot: generic share protect routine
+ *
+ * => caller must lock map entry's map
+ * => caller must lock object pointed to by map entry
+ */
+
+void
+uvm_shareprot(entry, prot)
+ vm_map_entry_t entry;
+ vm_prot_t prot;
+{
+ struct uvm_object *uobj = entry->object.uvm_obj;
+ struct vm_page *pp;
+ vaddr_t start, stop;
+ UVMHIST_FUNC("uvm_shareprot"); UVMHIST_CALLED(maphist);
+
+ if (UVM_ET_ISSUBMAP(entry))
+ panic("uvm_shareprot: non-object attached");
+
+ start = entry->offset;
+ stop = start + (entry->end - entry->start);
+
+ /*
+ * traverse list of pages in object. if page in range, pmap_prot it
+ */
+
+ for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = pp->listq.tqe_next) {
+ if (pp->offset >= start && pp->offset < stop)
+ pmap_page_protect(PMAP_PGARG(pp), prot);
+ }
+ UVMHIST_LOG(maphist, "<- done",0,0,0,0);
+}
+
+/*
+ * uvm_pager_put: high level pageout routine
+ *
+ * we want to pageout page "pg" to backing store, clustering if
+ * possible.
+ *
+ * => page queues must be locked by caller
+ * => if page is not swap-backed, then "uobj" points to the object
+ * backing it. this object should be locked by the caller.
+ * => if page is swap-backed, then "uobj" should be NULL.
+ * => "pg" should be PG_BUSY (by caller), and !PG_CLEAN
+ * for swap-backed memory, "pg" can be NULL if there is no page
+ * of interest [sometimes the case for the pagedaemon]
+ * => "ppsp_ptr" should point to an array of npages vm_page pointers
+ * for possible cluster building
+ * => flags (first two for non-swap-backed pages)
+ * PGO_ALLPAGES: all pages in uobj are valid targets
+ * PGO_DOACTCLUST: include "PQ_ACTIVE" pages as valid targets
+ * PGO_SYNCIO: do SYNC I/O (no async)
+ * PGO_PDFREECLUST: pagedaemon: drop cluster on successful I/O
+ * => start/stop: if (uobj && !PGO_ALLPAGES) limit targets to this range
+ * if (!uobj) start is the (daddr_t) of the starting swapblk
+ * => return state:
+ * 1. we return the VM_PAGER status code of the pageout
+ * 2. we return with the page queues unlocked
+ * 3. if (uobj != NULL) [!swap_backed] we return with
+ * uobj locked _only_ if PGO_PDFREECLUST is set
+ * AND result != VM_PAGER_PEND. in all other cases
+ * we return with uobj unlocked. [this is a hack
+ * that allows the pagedaemon to save one lock/unlock
+ * pair in the !swap_backed case since we have to
+ * lock the uobj to drop the cluster anyway]
+ * 4. on errors we always drop the cluster. thus, if we return
+ * !PEND, !OK, then the caller only has to worry about
+ * un-busying the main page (not the cluster pages).
+ * 5. on success, if !PGO_PDFREECLUST, we return the cluster
+ * with all pages busy (caller must un-busy and check
+ * wanted/released flags).
+ */
+
+int
+uvm_pager_put(uobj, pg, ppsp_ptr, npages, flags, start, stop)
+ struct uvm_object *uobj; /* IN */
+ struct vm_page *pg, ***ppsp_ptr;/* IN, IN/OUT */
+ int *npages; /* IN/OUT */
+ int flags; /* IN */
+ vaddr_t start, stop; /* IN, IN */
+{
+ int result;
+ daddr_t swblk;
+ struct vm_page **ppsp = *ppsp_ptr;
+
+ /*
+ * note that uobj is null if we are doing a swap-backed pageout.
+ * note that uobj is !null if we are doing normal object pageout.
+ * note that the page queues must be locked to cluster.
+ */
+
+ if (uobj) { /* if !swap-backed */
+
+ /*
+ * attempt to build a cluster for pageout using its
+ * make-put-cluster function (if it has one).
+ */
+
+ if (uobj->pgops->pgo_mk_pcluster) {
+ ppsp = uobj->pgops->pgo_mk_pcluster(uobj, ppsp,
+ npages, pg, flags, start, stop);
+ *ppsp_ptr = ppsp; /* update caller's pointer */
+ } else {
+ ppsp[0] = pg;
+ *npages = 1;
+ }
+
+ swblk = 0; /* XXX: keep gcc happy */
+
+ } else {
+
+ /*
+ * for swap-backed pageout, the caller (the pagedaemon) has
+ * already built the cluster for us. the starting swap
+ * block we are writing to has been passed in as "start."
+ * "pg" could be NULL if there is no page we are especially
+ * interested in (in which case the whole cluster gets dropped
+ * in the event of an error or a sync "done").
+ */
+ swblk = (daddr_t) start;
+ /* ppsp and npages should be ok */
+ }
+
+ /* now that we've clustered we can unlock the page queues */
+ uvm_unlock_pageq();
+
+ /*
+ * now attempt the I/O. if we have a failure and we are
+ * clustered, we will drop the cluster and try again.
+ */
+
+ReTry:
+ if (uobj) {
+ /* object is locked */
+ result = uobj->pgops->pgo_put(uobj, ppsp, *npages,
+ flags & PGO_SYNCIO);
+ /* object is now unlocked */
+ } else {
+ /* nothing locked */
+ result = uvm_swap_put(swblk, ppsp, *npages, flags & PGO_SYNCIO);
+ /* nothing locked */
+ }
+
+ /*
+ * we have attempted the I/O.
+ *
+ * if the I/O was a success then:
+ * if !PGO_PDFREECLUST, we return the cluster to the
+ * caller (who must un-busy all pages)
+ * else we un-busy cluster pages for the pagedaemon
+ *
+ * if I/O is pending (async i/o) then we return the pending code.
+ * [in this case the async i/o done function must clean up when
+ * i/o is done...]
+ */
+
+ if (result == VM_PAGER_PEND || result == VM_PAGER_OK) {
+ if (result == VM_PAGER_OK && (flags & PGO_PDFREECLUST)) {
+ /*
+ * drop cluster and relock object (only if I/O is
+ * not pending)
+ */
+ if (uobj)
+ /* required for dropcluster */
+ simple_lock(&uobj->vmobjlock);
+ if (*npages > 1 || pg == NULL)
+ uvm_pager_dropcluster(uobj, pg, ppsp, npages,
+ PGO_PDFREECLUST, 0);
+ /* if (uobj): object still locked, as per
+ * return-state item #3 */
+ }
+ return (result);
+ }
+
+ /*
+ * a pager error occured. if we have clustered, we drop the
+ * cluster and try again.
+ */
+
+ if (*npages > 1 || pg == NULL) {
+ if (uobj)
+ simple_lock(&uobj->vmobjlock);
+ uvm_pager_dropcluster(uobj, pg, ppsp, npages, PGO_REALLOCSWAP,
+ swblk);
+ if (pg != NULL)
+ goto ReTry;
+ }
+
+ /*
+ * a pager error occured (even after dropping the cluster, if there
+ * was one). give up! the caller only has one page ("pg")
+ * to worry about.
+ */
+
+ if (uobj && (flags & PGO_PDFREECLUST) != 0)
+ simple_lock(&uobj->vmobjlock);
+ return(result);
+}
+
+/*
+ * uvm_pager_dropcluster: drop a cluster we have built (because we
+ * got an error, or, if PGO_PDFREECLUST we are un-busying the
+ * cluster pages on behalf of the pagedaemon).
+ *
+ * => uobj, if non-null, is a non-swap-backed object that is
+ * locked by the caller. we return with this object still
+ * locked.
+ * => page queues are not locked
+ * => pg is our page of interest (the one we clustered around, can be null)
+ * => ppsp/npages is our current cluster
+ * => flags: PGO_PDFREECLUST: pageout was a success: un-busy cluster
+ * pages on behalf of the pagedaemon.
+ * PGO_REALLOCSWAP: drop previously allocated swap slots for
+ * clustered swap-backed pages (except for "pg" if !NULL)
+ * "swblk" is the start of swap alloc (e.g. for ppsp[0])
+ * [only meaningful if swap-backed (uobj == NULL)]
+ */
+
+
+void uvm_pager_dropcluster(uobj, pg, ppsp, npages, flags, swblk)
+
+struct uvm_object *uobj; /* IN */
+struct vm_page *pg, **ppsp; /* IN, IN/OUT */
+int *npages; /* IN/OUT */
+int flags;
+int swblk; /* valid if (uobj == NULL && PGO_REALLOCSWAP) */
+
+{
+ int lcv;
+ boolean_t obj_is_alive;
+ struct uvm_object *saved_uobj;
+
+ /*
+ * if we need to reallocate swap space for the cluster we are dropping
+ * (true if swap-backed and PGO_REALLOCSWAP) then free the old
+ * allocation now. save a block for "pg" if it is non-NULL.
+ *
+ * note that we will zap the object's pointer to swap in the "for" loop
+ * below...
+ */
+
+ if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
+ if (pg)
+ uvm_swap_free(swblk + 1, *npages - 1);
+ else
+ uvm_swap_free(swblk, *npages);
+ }
+
+ /*
+ * drop all pages but "pg"
+ */
+
+ for (lcv = 0 ; lcv < *npages ; lcv++) {
+
+ if (ppsp[lcv] == pg) /* skip "pg" */
+ continue;
+
+ /*
+ * if swap-backed, gain lock on object that owns page. note
+ * that PQ_ANON bit can't change as long as we are holding
+ * the PG_BUSY bit (so there is no need to lock the page
+ * queues to test it).
+ *
+ * once we have the lock, dispose of the pointer to swap, if
+ * requested
+ */
+ if (!uobj) {
+ if (ppsp[lcv]->pqflags & PQ_ANON) {
+ simple_lock(&ppsp[lcv]->uanon->an_lock);
+ if (flags & PGO_REALLOCSWAP)
+ /* zap swap block */
+ ppsp[lcv]->uanon->an_swslot = 0;
+ } else {
+ simple_lock(&ppsp[lcv]->uobject->vmobjlock);
+ if (flags & PGO_REALLOCSWAP)
+ uao_set_swslot(ppsp[lcv]->uobject,
+ ppsp[lcv]->offset >> PAGE_SHIFT, 0);
+ }
+ }
+
+ /* did someone want the page while we had it busy-locked? */
+ if (ppsp[lcv]->flags & PG_WANTED)
+ /* still holding obj lock */
+ thread_wakeup(ppsp[lcv]);
+
+ /* if page was released, release it. otherwise un-busy it */
+ if (ppsp[lcv]->flags & PG_RELEASED) {
+
+ if (ppsp[lcv]->pqflags & PQ_ANON) {
+ /* so that anfree will free */
+ ppsp[lcv]->flags &= ~(PG_BUSY);
+ UVM_PAGE_OWN(ppsp[lcv], NULL);
+
+ pmap_page_protect(PMAP_PGARG(ppsp[lcv]),
+ VM_PROT_NONE); /* be safe */
+ simple_unlock(&ppsp[lcv]->uanon->an_lock);
+ /* kills anon and frees pg */
+ uvm_anfree(ppsp[lcv]->uanon);
+
+ continue;
+ }
+
+ /*
+ * pgo_releasepg will dump the page for us
+ */
+
+#ifdef DIAGNOSTIC
+ if (ppsp[lcv]->uobject->pgops->pgo_releasepg == NULL)
+ panic("uvm_pager_dropcluster: no releasepg "
+ "function");
+#endif
+ saved_uobj = ppsp[lcv]->uobject;
+ obj_is_alive =
+ saved_uobj->pgops->pgo_releasepg(ppsp[lcv], NULL);
+
+#ifdef DIAGNOSTIC
+ /* for normal objects, "pg" is still PG_BUSY by us,
+ * so obj can't die */
+ if (uobj && !obj_is_alive)
+ panic("uvm_pager_dropcluster: object died "
+ "with active page");
+#endif
+ /* only unlock the object if it is still alive... */
+ if (obj_is_alive && saved_uobj != uobj)
+ simple_unlock(&saved_uobj->vmobjlock);
+
+ /*
+ * XXXCDC: suppose uobj died in the pgo_releasepg?
+ * how pass that
+ * info up to caller. we are currently ignoring it...
+ */
+
+ continue; /* next page */
+
+ } else {
+ ppsp[lcv]->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(ppsp[lcv], NULL);
+ }
+
+ /*
+ * if we are operating on behalf of the pagedaemon and we
+ * had a successful pageout update the page!
+ */
+ if (flags & PGO_PDFREECLUST) {
+ /* XXX: with PMAP_NEW ref should already be clear,
+ * but don't trust! */
+ pmap_clear_reference(PMAP_PGARG(ppsp[lcv]));
+ pmap_clear_modify(PMAP_PGARG(ppsp[lcv]));
+ ppsp[lcv]->flags |= PG_CLEAN;
+ }
+
+ /* if anonymous cluster, unlock object and move on */
+ if (!uobj) {
+ if (ppsp[lcv]->pqflags & PQ_ANON)
+ simple_unlock(&ppsp[lcv]->uanon->an_lock);
+ else
+ simple_unlock(&ppsp[lcv]->uobject->vmobjlock);
+ }
+
+ }
+
+ /*
+ * drop to a cluster of 1 page ("pg") if requested
+ */
+
+ if (pg && (flags & PGO_PDFREECLUST) == 0) {
+ /*
+ * if we are not a successful pageout, we make a 1 page cluster.
+ */
+ ppsp[0] = pg;
+ *npages = 1;
+
+ /*
+ * assign new swap block to new cluster, if anon backed
+ */
+ if (uobj == NULL && (flags & PGO_REALLOCSWAP)) {
+ if (pg->pqflags & PQ_ANON) {
+ simple_lock(&pg->uanon->an_lock);
+ pg->uanon->an_swslot = swblk; /* reassign */
+ simple_unlock(&pg->uanon->an_lock);
+ } else {
+ simple_lock(&pg->uobject->vmobjlock);
+ uao_set_swslot(pg->uobject,
+ pg->offset >> PAGE_SHIFT, swblk);
+ simple_unlock(&pg->uobject->vmobjlock);
+ }
+ }
+ }
+}
diff --git a/sys/uvm/uvm_pager.h b/sys/uvm/uvm_pager.h
new file mode 100644
index 00000000000..f48082e4b44
--- /dev/null
+++ b/sys/uvm/uvm_pager.h
@@ -0,0 +1,158 @@
+/* $NetBSD: uvm_pager.h,v 1.7 1998/08/13 02:11:03 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager.h,v 1.1.2.14 1998/01/13 19:00:50 chuck Exp
+ */
+
+#ifndef _UVM_UVM_PAGER_H_
+#define _UVM_UVM_PAGER_H_
+
+/*
+ * uvm_pager.h
+ */
+
+/*
+ * async pager i/o descriptor structure
+ */
+
+TAILQ_HEAD(uvm_aiohead, uvm_aiodesc);
+
+struct uvm_aiodesc {
+ void (*aiodone) __P((struct uvm_aiodesc *));
+ /* aio done function */
+ vaddr_t kva; /* KVA of mapped page(s) */
+ int npages; /* # of pages in I/O req */
+ void *pd_ptr; /* pager-dependent pointer */
+ TAILQ_ENTRY(uvm_aiodesc) aioq; /* linked list of aio's */
+};
+
+/*
+ * pager ops
+ */
+
+struct uvm_pagerops {
+ void (*pgo_init) __P((void));/* init pager */
+ struct uvm_object * (*pgo_attach) /* get uvm_object */
+ __P((void *, vm_prot_t));
+ void (*pgo_reference) /* add reference to obj */
+ __P((struct uvm_object *));
+ void (*pgo_detach) /* drop reference to obj */
+ __P((struct uvm_object *));
+ int (*pgo_fault) /* special nonstd fault fn */
+ __P((struct uvm_faultinfo *, vaddr_t,
+ vm_page_t *, int, int, vm_fault_t,
+ vm_prot_t, int));
+ boolean_t (*pgo_flush) /* flush pages out of obj */
+ __P((struct uvm_object *, vaddr_t,
+ vaddr_t, int));
+ int (*pgo_get) /* get/read page */
+ __P((struct uvm_object *, vaddr_t,
+ vm_page_t *, int *, int, vm_prot_t, int, int));
+ int (*pgo_asyncget) /* start async get */
+ __P((struct uvm_object *, vaddr_t, int));
+ int (*pgo_put) /* put/write page */
+ __P((struct uvm_object *, vm_page_t *,
+ int, boolean_t));
+ void (*pgo_cluster) /* return range of cluster */
+ __P((struct uvm_object *, vaddr_t, vaddr_t *,
+ vaddr_t *));
+ struct vm_page ** (*pgo_mk_pcluster) /* make "put" cluster */
+ __P((struct uvm_object *, struct vm_page **,
+ int *, struct vm_page *, int, vaddr_t,
+ vaddr_t));
+ void (*pgo_shareprot) /* share protect */
+ __P((vm_map_entry_t, vm_prot_t));
+ void (*pgo_aiodone) /* async iodone */
+ __P((struct uvm_aiodesc *));
+ boolean_t (*pgo_releasepg) /* release page */
+ __P((struct vm_page *, struct vm_page **));
+};
+
+/* pager flags [mostly for flush] */
+
+#define PGO_CLEANIT 0x001 /* write dirty pages to backing store */
+#define PGO_SYNCIO 0x002 /* if PGO_CLEAN: use sync I/O? */
+/*
+ * obviously if neither PGO_INVALIDATE or PGO_FREE are set then the pages
+ * stay where they are.
+ */
+#define PGO_DEACTIVATE 0x004 /* deactivate flushed pages */
+#define PGO_FREE 0x008 /* free flushed pages */
+
+#define PGO_ALLPAGES 0x010 /* flush whole object/get all pages */
+#define PGO_DOACTCLUST 0x020 /* flag to mk_pcluster to include active */
+#define PGO_LOCKED 0x040 /* fault data structures are locked [get] */
+#define PGO_PDFREECLUST 0x080 /* daemon's free cluster flag [uvm_pager_put] */
+#define PGO_REALLOCSWAP 0x100 /* reallocate swap area [pager_dropcluster] */
+
+/* page we are not interested in getting */
+#define PGO_DONTCARE ((struct vm_page *) -1) /* [get only] */
+
+/*
+ * handle inline options
+ */
+
+#ifdef UVM_PAGER_INLINE
+#define PAGER_INLINE static __inline
+#else
+#define PAGER_INLINE /* nothing */
+#endif /* UVM_PAGER_INLINE */
+
+/*
+ * prototypes
+ */
+
+void uvm_pager_dropcluster __P((struct uvm_object *,
+ struct vm_page *, struct vm_page **,
+ int *, int, int));
+void uvm_pager_init __P((void));
+int uvm_pager_put __P((struct uvm_object *, struct vm_page *,
+ struct vm_page ***, int *, int,
+ vaddr_t, vaddr_t));
+
+PAGER_INLINE struct vm_page *uvm_pageratop __P((vaddr_t));
+
+vaddr_t uvm_pagermapin __P((struct vm_page **, int,
+ struct uvm_aiodesc **, int));
+void uvm_pagermapout __P((vaddr_t, int));
+struct vm_page **uvm_mk_pcluster __P((struct uvm_object *, struct vm_page **,
+ int *, struct vm_page *, int,
+ vaddr_t, vaddr_t));
+void uvm_shareprot __P((vm_map_entry_t, vm_prot_t));
+
+
+#endif /* _UVM_UVM_PAGER_H_ */
diff --git a/sys/uvm/uvm_pager_i.h b/sys/uvm/uvm_pager_i.h
new file mode 100644
index 00000000000..7e8e8675df7
--- /dev/null
+++ b/sys/uvm/uvm_pager_i.h
@@ -0,0 +1,73 @@
+/* $NetBSD: uvm_pager_i.h,v 1.6 1998/08/13 02:11:03 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_pager_i.h,v 1.1.2.2 1997/10/09 23:05:46 chuck Exp
+ */
+
+#ifndef _UVM_UVM_PAGER_I_H_
+#define _UVM_UVM_PAGER_I_H_
+
+/*
+ * uvm_pager_i.h
+ */
+
+/*
+ * inline functions [maybe]
+ */
+
+#if defined(UVM_PAGER_INLINE) || defined(UVM_PAGER)
+
+/*
+ * uvm_pageratop: convert KVAs in the pager map back to their page
+ * structures.
+ */
+
+PAGER_INLINE struct vm_page *
+uvm_pageratop(kva)
+ vaddr_t kva;
+{
+ paddr_t pa;
+
+ pa = pmap_extract(pmap_kernel(), kva);
+ if (pa == 0)
+ panic("uvm_pageratop");
+ return (PHYS_TO_VM_PAGE(pa));
+}
+
+#endif /* defined(UVM_PAGER_INLINE) || defined(UVM_PAGER) */
+
+#endif /* _UVM_UVM_PAGER_I_H_ */
diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c
new file mode 100644
index 00000000000..f1b0fcc327d
--- /dev/null
+++ b/sys/uvm/uvm_pdaemon.c
@@ -0,0 +1,1012 @@
+/* $NetBSD: uvm_pdaemon.c,v 1.12 1998/11/04 07:06:05 chs Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.c 8.5 (Berkeley) 2/14/94
+ * from: Id: uvm_pdaemon.c,v 1.1.2.32 1998/02/06 05:26:30 chs Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+/*
+ * uvm_pdaemon.c: the page daemon
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/pool.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * local prototypes
+ */
+
+static void uvmpd_scan __P((void));
+static boolean_t uvmpd_scan_inactive __P((struct pglist *));
+static void uvmpd_tune __P((void));
+
+
+/*
+ * uvm_wait: wait (sleep) for the page daemon to free some pages
+ *
+ * => should be called with all locks released
+ * => should _not_ be called by the page daemon (to avoid deadlock)
+ */
+
+void uvm_wait(wmsg)
+ char *wmsg;
+{
+ int timo = 0;
+ int s = splbio();
+
+ /*
+ * check for page daemon going to sleep (waiting for itself)
+ */
+
+ if (curproc == uvm.pagedaemon_proc) {
+ /*
+ * now we have a problem: the pagedaemon wants to go to
+ * sleep until it frees more memory. but how can it
+ * free more memory if it is asleep? that is a deadlock.
+ * we have two options:
+ * [1] panic now
+ * [2] put a timeout on the sleep, thus causing the
+ * pagedaemon to only pause (rather than sleep forever)
+ *
+ * note that option [2] will only help us if we get lucky
+ * and some other process on the system breaks the deadlock
+ * by exiting or freeing memory (thus allowing the pagedaemon
+ * to continue). for now we panic if DEBUG is defined,
+ * otherwise we hope for the best with option [2] (better
+ * yet, this should never happen in the first place!).
+ */
+
+ printf("pagedaemon: deadlock detected!\n");
+ timo = hz >> 3; /* set timeout */
+#if defined(DEBUG)
+ /* DEBUG: panic so we can debug it */
+ panic("pagedaemon deadlock");
+#endif
+ }
+
+ simple_lock(&uvm.pagedaemon_lock);
+ thread_wakeup(&uvm.pagedaemon); /* wake the daemon! */
+ UVM_UNLOCK_AND_WAIT(&uvmexp.free, &uvm.pagedaemon_lock, FALSE, wmsg,
+ timo);
+
+ splx(s);
+}
+
+
+/*
+ * uvmpd_tune: tune paging parameters
+ *
+ * => called when ever memory is added (or removed?) to the system
+ * => caller must call with page queues locked
+ */
+
+static void
+uvmpd_tune()
+{
+ UVMHIST_FUNC("uvmpd_tune"); UVMHIST_CALLED(pdhist);
+
+ uvmexp.freemin = uvmexp.npages / 20;
+
+ /* between 16k and 256k */
+ /* XXX: what are these values good for? */
+ uvmexp.freemin = max(uvmexp.freemin, (16*1024) >> PAGE_SHIFT);
+ uvmexp.freemin = min(uvmexp.freemin, (256*1024) >> PAGE_SHIFT);
+
+ uvmexp.freetarg = (uvmexp.freemin * 4) / 3;
+ if (uvmexp.freetarg <= uvmexp.freemin)
+ uvmexp.freetarg = uvmexp.freemin + 1;
+
+ /* uvmexp.inactarg: computed in main daemon loop */
+
+ uvmexp.wiredmax = uvmexp.npages / 3;
+ UVMHIST_LOG(pdhist, "<- done, freemin=%d, freetarg=%d, wiredmax=%d",
+ uvmexp.freemin, uvmexp.freetarg, uvmexp.wiredmax, 0);
+}
+
+/*
+ * uvm_pageout: the main loop for the pagedaemon
+ */
+
+void
+uvm_pageout()
+{
+ int npages = 0;
+ int s;
+ struct uvm_aiodesc *aio, *nextaio;
+ UVMHIST_FUNC("uvm_pageout"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist,"<starting uvm pagedaemon>", 0, 0, 0, 0);
+
+ /*
+ * ensure correct priority and set paging parameters...
+ */
+
+ uvm.pagedaemon_proc = curproc;
+ (void) spl0();
+ uvm_lock_pageq();
+ npages = uvmexp.npages;
+ uvmpd_tune();
+ uvm_unlock_pageq();
+
+ /*
+ * main loop
+ */
+ while (TRUE) {
+
+ /*
+ * carefully attempt to go to sleep (without losing "wakeups"!).
+ * we need splbio because we want to make sure the aio_done list
+ * is totally empty before we go to sleep.
+ */
+
+ s = splbio();
+ simple_lock(&uvm.pagedaemon_lock);
+
+ /*
+ * if we've got done aio's, then bypass the sleep
+ */
+
+ if (uvm.aio_done.tqh_first == NULL) {
+ UVMHIST_LOG(maphist," <<SLEEPING>>",0,0,0,0);
+ UVM_UNLOCK_AND_WAIT(&uvm.pagedaemon,
+ &uvm.pagedaemon_lock, FALSE, "daemon_slp", 0);
+ uvmexp.pdwoke++;
+ UVMHIST_LOG(pdhist," <<WOKE UP>>",0,0,0,0);
+
+ /* relock pagedaemon_lock, still at splbio */
+ simple_lock(&uvm.pagedaemon_lock);
+ }
+
+ /*
+ * check for done aio structures
+ */
+
+ aio = uvm.aio_done.tqh_first; /* save current list (if any)*/
+ if (aio) {
+ TAILQ_INIT(&uvm.aio_done); /* zero global list */
+ }
+
+ simple_unlock(&uvm.pagedaemon_lock); /* unlock */
+ splx(s); /* drop splbio */
+
+ /*
+ * first clear out any pending aios (to free space in case we
+ * want to pageout more stuff).
+ */
+
+ for (/*null*/; aio != NULL ; aio = nextaio) {
+
+ uvmexp.paging -= aio->npages;
+ nextaio = aio->aioq.tqe_next;
+ aio->aiodone(aio);
+
+ }
+
+ /* Next, drain pool resources */
+ pool_drain(0);
+
+ /*
+ * now lock page queues and recompute inactive count
+ */
+ uvm_lock_pageq();
+
+ if (npages != uvmexp.npages) { /* check for new pages? */
+ npages = uvmexp.npages;
+ uvmpd_tune();
+ }
+
+ uvmexp.inactarg = (uvmexp.active + uvmexp.inactive) / 3;
+ if (uvmexp.inactarg <= uvmexp.freetarg)
+ uvmexp.inactarg = uvmexp.freetarg + 1;
+
+ UVMHIST_LOG(pdhist," free/ftarg=%d/%d, inact/itarg=%d/%d",
+ uvmexp.free, uvmexp.freetarg, uvmexp.inactive,
+ uvmexp.inactarg);
+
+ /*
+ * scan if needed
+ * [XXX: note we are reading uvm.free without locking]
+ */
+ if (uvmexp.free < uvmexp.freetarg ||
+ uvmexp.inactive < uvmexp.inactarg)
+ uvmpd_scan();
+
+ /*
+ * done scan. unlock page queues (the only lock we are holding)
+ */
+ uvm_unlock_pageq();
+
+ /*
+ * done! restart loop.
+ */
+ thread_wakeup(&uvmexp.free);
+ }
+ /*NOTREACHED*/
+}
+
+/*
+ * uvmpd_scan_inactive: the first loop of uvmpd_scan broken out into
+ * its own function for ease of reading.
+ *
+ * => called with page queues locked
+ * => we work on meeting our free target by converting inactive pages
+ * into free pages.
+ * => we handle the building of swap-backed clusters
+ * => we return TRUE if we are exiting because we met our target
+ */
+
+static boolean_t
+uvmpd_scan_inactive(pglst)
+ struct pglist *pglst;
+{
+ boolean_t retval = FALSE; /* assume we haven't hit target */
+ int s, free, result;
+ struct vm_page *p, *nextpg;
+ struct uvm_object *uobj;
+ struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+ int npages;
+ struct vm_page *swpps[MAXBSIZE >> PAGE_SHIFT]; /* XXX: see below */
+ int swnpages, swcpages; /* XXX: see below */
+ int swslot, oldslot;
+ struct vm_anon *anon;
+ boolean_t swap_backed;
+ vaddr_t start;
+ UVMHIST_FUNC("uvmpd_scan_inactive"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * note: we currently keep swap-backed pages on a seperate inactive
+ * list from object-backed pages. however, merging the two lists
+ * back together again hasn't been ruled out. thus, we keep our
+ * swap cluster in "swpps" rather than in pps (allows us to mix
+ * clustering types in the event of a mixed inactive queue).
+ */
+
+ /*
+ * swslot is non-zero if we are building a swap cluster. we want
+ * to stay in the loop while we have a page to scan or we have
+ * a swap-cluster to build.
+ */
+ swslot = 0;
+ swnpages = swcpages = 0;
+ free = 0;
+
+ for (p = pglst->tqh_first ; p != NULL || swslot != 0 ; p = nextpg) {
+
+ /*
+ * note that p can be NULL iff we have traversed the whole
+ * list and need to do one final swap-backed clustered pageout.
+ */
+ if (p) {
+ /*
+ * update our copy of "free" and see if we've met
+ * our target
+ */
+ s = splimp();
+ uvm_lock_fpageq();
+ free = uvmexp.free;
+ uvm_unlock_fpageq();
+ splx(s);
+
+ if (free >= uvmexp.freetarg) {
+ UVMHIST_LOG(pdhist," met free target: "
+ "exit loop", 0, 0, 0, 0);
+ retval = TRUE; /* hit the target! */
+
+ if (swslot == 0)
+ /* exit now if no swap-i/o pending */
+ break;
+
+ /* set p to null to signal final swap i/o */
+ p = NULL;
+ }
+ }
+
+ uobj = NULL; /* be safe and shut gcc up */
+ anon = NULL; /* be safe and shut gcc up */
+
+ if (p) { /* if (we have a new page to consider) */
+ /*
+ * we are below target and have a new page to consider.
+ */
+ uvmexp.pdscans++;
+ nextpg = p->pageq.tqe_next;
+
+ /*
+ * move referenced pages back to active queue and
+ * skip to next page (unlikely to happen since
+ * inactive pages shouldn't have any valid mappings
+ * and we cleared reference before deactivating).
+ */
+ if (pmap_is_referenced(PMAP_PGARG(p))) {
+ uvm_pageactivate(p);
+ uvmexp.pdreact++;
+ continue;
+ }
+
+ /*
+ * first we attempt to lock the object that this page
+ * belongs to. if our attempt fails we skip on to
+ * the next page (no harm done). it is important to
+ * "try" locking the object as we are locking in the
+ * wrong order (pageq -> object) and we don't want to
+ * get deadlocked.
+ *
+ * the only time we exepct to see an ownerless page
+ * (i.e. a page with no uobject and !PQ_ANON) is if an
+ * anon has loaned a page from a uvm_object and the
+ * uvm_object has dropped the ownership. in that
+ * case, the anon can "take over" the loaned page
+ * and make it its own.
+ */
+
+ /* is page part of an anon or ownerless ? */
+ if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
+
+ anon = p->uanon;
+
+#ifdef DIAGNOSTIC
+ /* to be on inactive q, page must be part
+ * of _something_ */
+ if (anon == NULL)
+ panic("pagedaemon: page with no anon "
+ "or object detected - loop 1");
+#endif
+
+ if (!simple_lock_try(&anon->an_lock))
+ /* lock failed, skip this page */
+ continue;
+
+ /*
+ * if the page is ownerless, claim it in the
+ * name of "anon"!
+ */
+ if ((p->pqflags & PQ_ANON) == 0) {
+#ifdef DIAGNOSTIC
+ if (p->loan_count < 1)
+ panic("pagedaemon: non-loaned "
+ "ownerless page detected -"
+ " loop 1");
+#endif
+ p->loan_count--;
+ p->pqflags |= PQ_ANON; /* anon now owns it */
+ }
+
+ if (p->flags & PG_BUSY) {
+ simple_unlock(&anon->an_lock);
+ uvmexp.pdbusy++;
+ /* someone else owns page, skip it */
+ continue;
+ }
+
+ uvmexp.pdanscan++;
+
+ } else {
+
+ uobj = p->uobject;
+
+ if (!simple_lock_try(&uobj->vmobjlock))
+ /* lock failed, skip this page */
+ continue;
+
+ if (p->flags & PG_BUSY) {
+ simple_unlock(&uobj->vmobjlock);
+ uvmexp.pdbusy++;
+ /* someone else owns page, skip it */
+ continue;
+ }
+
+ uvmexp.pdobscan++;
+ }
+
+ /*
+ * we now have the object and the page queues locked.
+ * the page is not busy. if the page is clean we
+ * can free it now and continue.
+ */
+
+ if (p->flags & PG_CLEAN) {
+ /* zap all mappings with pmap_page_protect... */
+ pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
+ uvm_pagefree(p);
+ uvmexp.pdfreed++;
+
+ if (anon) {
+#ifdef DIAGNOSTIC
+ /*
+ * an anonymous page can only be clean
+ * if it has valid backing store.
+ */
+ if (anon->an_swslot == 0)
+ panic("pagedaemon: clean anon "
+ "page without backing store?");
+#endif
+ /* remove from object */
+ anon->u.an_page = NULL;
+ simple_unlock(&anon->an_lock);
+ } else {
+ /* pagefree has already removed the
+ * page from the object */
+ simple_unlock(&uobj->vmobjlock);
+ }
+ continue;
+ }
+
+ /*
+ * this page is dirty, skip it if we'll have met our
+ * free target when all the current pageouts complete.
+ */
+ if (free + uvmexp.paging > uvmexp.freetarg)
+ {
+ if (anon) {
+ simple_unlock(&anon->an_lock);
+ } else {
+ simple_unlock(&uobj->vmobjlock);
+ }
+ continue;
+ }
+
+ /*
+ * the page we are looking at is dirty. we must
+ * clean it before it can be freed. to do this we
+ * first mark the page busy so that no one else will
+ * touch the page. we write protect all the mappings
+ * of the page so that no one touches it while it is
+ * in I/O.
+ */
+
+ swap_backed = ((p->pqflags & PQ_SWAPBACKED) != 0);
+ p->flags |= PG_BUSY; /* now we own it */
+ UVM_PAGE_OWN(p, "scan_inactive");
+ pmap_page_protect(PMAP_PGARG(p), VM_PROT_READ);
+ uvmexp.pgswapout++;
+
+ /*
+ * for swap-backed pages we need to (re)allocate
+ * swap space.
+ */
+ if (swap_backed) {
+
+ /*
+ * free old swap slot (if any)
+ */
+ if (anon) {
+ if (anon->an_swslot) {
+ uvm_swap_free(anon->an_swslot,
+ 1);
+ anon->an_swslot = 0;
+ }
+ } else {
+ oldslot = uao_set_swslot(uobj,
+ p->offset >> PAGE_SHIFT, 0);
+
+ if (oldslot)
+ uvm_swap_free(oldslot, 1);
+ }
+
+ /*
+ * start new cluster (if necessary)
+ */
+ if (swslot == 0) {
+ /* want this much */
+ swnpages = MAXBSIZE >> PAGE_SHIFT;
+
+ swslot = uvm_swap_alloc(&swnpages,
+ TRUE);
+
+ if (swslot == 0) {
+ /* no swap? give up! */
+ p->flags &= ~PG_BUSY;
+ UVM_PAGE_OWN(p, NULL);
+ if (anon)
+ simple_unlock(
+ &anon->an_lock);
+ else
+ simple_unlock(
+ &uobj->vmobjlock);
+ continue;
+ }
+ swcpages = 0; /* cluster is empty */
+ }
+
+ /*
+ * add block to cluster
+ */
+ swpps[swcpages] = p;
+ uvmexp.pgswapout++;
+ if (anon)
+ anon->an_swslot = swslot + swcpages;
+ else
+ uao_set_swslot(uobj,
+ p->offset >> PAGE_SHIFT,
+ swslot + swcpages);
+ swcpages++;
+
+ /* done (swap-backed) */
+ }
+
+ /* end: if (p) ["if we have new page to consider"] */
+ } else {
+
+ /* if p == NULL we must be doing a last swap i/o */
+ swap_backed = TRUE;
+ }
+
+ /*
+ * now consider doing the pageout.
+ *
+ * for swap-backed pages, we do the pageout if we have either
+ * filled the cluster (in which case (swnpages == swcpages) or
+ * run out of pages (p == NULL).
+ *
+ * for object pages, we always do the pageout.
+ */
+ if (swap_backed) {
+
+ if (p) { /* if we just added a page to cluster */
+ if (anon)
+ simple_unlock(&anon->an_lock);
+ else
+ simple_unlock(&uobj->vmobjlock);
+
+ /* cluster not full yet? */
+ if (swcpages < swnpages)
+ continue;
+ }
+
+ /* starting I/O now... set up for it */
+ npages = swcpages;
+ ppsp = swpps;
+ /* for swap-backed pages only */
+ start = (vaddr_t) swslot;
+
+ /* if this is final pageout we could have a few
+ * extra swap blocks */
+ if (swcpages < swnpages) {
+ uvm_swap_free(swslot + swcpages,
+ (swnpages - swcpages));
+ }
+
+ } else {
+
+ /* normal object pageout */
+ ppsp = pps;
+ npages = sizeof(pps) / sizeof(struct vm_page *);
+ /* not looked at because PGO_ALLPAGES is set */
+ start = 0;
+
+ }
+
+ /*
+ * now do the pageout.
+ *
+ * for swap_backed pages we have already built the cluster.
+ * for !swap_backed pages, uvm_pager_put will call the object's
+ * "make put cluster" function to build a cluster on our behalf.
+ *
+ * we pass the PGO_PDFREECLUST flag to uvm_pager_put to instruct
+ * it to free the cluster pages for us on a successful I/O (it
+ * always does this for un-successful I/O requests). this
+ * allows us to do clustered pageout without having to deal
+ * with cluster pages at this level.
+ *
+ * note locking semantics of uvm_pager_put with PGO_PDFREECLUST:
+ * IN: locked: uobj (if !swap_backed), page queues
+ * OUT: locked: uobj (if !swap_backed && result !=VM_PAGER_PEND)
+ * !locked: pageqs, uobj (if swap_backed || VM_PAGER_PEND)
+ *
+ * [the bit about VM_PAGER_PEND saves us one lock-unlock pair]
+ */
+
+ /* locked: uobj (if !swap_backed), page queues */
+ uvmexp.pdpageouts++;
+ result = uvm_pager_put((swap_backed) ? NULL : uobj, p,
+ &ppsp, &npages, PGO_ALLPAGES|PGO_PDFREECLUST, start, 0);
+ /* locked: uobj (if !swap_backed && result != PEND) */
+ /* unlocked: pageqs, object (if swap_backed ||result == PEND) */
+
+ /*
+ * if we did i/o to swap, zero swslot to indicate that we are
+ * no longer building a swap-backed cluster.
+ */
+
+ if (swap_backed)
+ swslot = 0; /* done with this cluster */
+
+ /*
+ * first, we check for VM_PAGER_PEND which means that the
+ * async I/O is in progress and the async I/O done routine
+ * will clean up after us. in this case we move on to the
+ * next page.
+ *
+ * there is a very remote chance that the pending async i/o can
+ * finish _before_ we get here. if that happens, our page "p"
+ * may no longer be on the inactive queue. so we verify this
+ * when determining the next page (starting over at the head if
+ * we've lost our inactive page).
+ */
+
+ if (result == VM_PAGER_PEND) {
+ uvmexp.paging += npages;
+ uvm_lock_pageq(); /* relock page queues */
+ uvmexp.pdpending++;
+ if (p) {
+ if (p->pqflags & PQ_INACTIVE)
+ /* reload! */
+ nextpg = p->pageq.tqe_next;
+ else
+ /* reload! */
+ nextpg = pglst->tqh_first;
+ } else {
+ nextpg = NULL; /* done list */
+ }
+ continue;
+ }
+
+ /*
+ * clean up "p" if we have one
+ */
+
+ if (p) {
+ /*
+ * the I/O request to "p" is done and uvm_pager_put
+ * has freed any cluster pages it may have allocated
+ * during I/O. all that is left for us to do is
+ * clean up page "p" (which is still PG_BUSY).
+ *
+ * our result could be one of the following:
+ * VM_PAGER_OK: successful pageout
+ *
+ * VM_PAGER_AGAIN: tmp resource shortage, we skip
+ * to next page
+ * VM_PAGER_{FAIL,ERROR,BAD}: an error. we
+ * "reactivate" page to get it out of the way (it
+ * will eventually drift back into the inactive
+ * queue for a retry).
+ * VM_PAGER_UNLOCK: should never see this as it is
+ * only valid for "get" operations
+ */
+
+ /* relock p's object: page queues not lock yet, so
+ * no need for "try" */
+
+ /* !swap_backed case: already locked... */
+ if (swap_backed) {
+ if (anon)
+ simple_lock(&anon->an_lock);
+ else
+ simple_lock(&uobj->vmobjlock);
+ }
+
+#ifdef DIAGNOSTIC
+ if (result == VM_PAGER_UNLOCK)
+ panic("pagedaemon: pageout returned "
+ "invalid 'unlock' code");
+#endif
+
+ /* handle PG_WANTED now */
+ if (p->flags & PG_WANTED)
+ /* still holding object lock */
+ thread_wakeup(p);
+
+ p->flags &= ~(PG_BUSY|PG_WANTED);
+ UVM_PAGE_OWN(p, NULL);
+
+ /* released during I/O? */
+ if (p->flags & PG_RELEASED) {
+ if (anon) {
+ /* remove page so we can get nextpg */
+ anon->u.an_page = NULL;
+
+ simple_unlock(&anon->an_lock);
+ uvm_anfree(anon); /* kills anon */
+ pmap_page_protect(PMAP_PGARG(p),
+ VM_PROT_NONE);
+ anon = NULL;
+ uvm_lock_pageq();
+ nextpg = p->pageq.tqe_next;
+ /* free released page */
+ uvm_pagefree(p);
+
+ } else {
+
+#ifdef DIAGNOSTIC
+ if (uobj->pgops->pgo_releasepg == NULL)
+ panic("pagedaemon: no "
+ "pgo_releasepg function");
+#endif
+
+ /*
+ * pgo_releasepg nukes the page and
+ * gets "nextpg" for us. it returns
+ * with the page queues locked (when
+ * given nextpg ptr).
+ */
+ if (!uobj->pgops->pgo_releasepg(p,
+ &nextpg))
+ /* uobj died after release */
+ uobj = NULL;
+
+ /*
+ * lock page queues here so that they're
+ * always locked at the end of the loop.
+ */
+ uvm_lock_pageq();
+ }
+
+ } else { /* page was not released during I/O */
+
+ uvm_lock_pageq();
+ nextpg = p->pageq.tqe_next;
+
+ if (result != VM_PAGER_OK) {
+
+ /* pageout was a failure... */
+ if (result != VM_PAGER_AGAIN)
+ uvm_pageactivate(p);
+ pmap_clear_reference(PMAP_PGARG(p));
+ /* XXXCDC: if (swap_backed) FREE p's
+ * swap block? */
+
+ } else {
+
+ /* pageout was a success... */
+ pmap_clear_reference(PMAP_PGARG(p));
+ pmap_clear_modify(PMAP_PGARG(p));
+ p->flags |= PG_CLEAN;
+ /* XXX: could free page here, but old
+ * pagedaemon does not */
+
+ }
+ }
+
+ /*
+ * drop object lock (if there is an object left). do
+ * a safety check of nextpg to make sure it is on the
+ * inactive queue (it should be since PG_BUSY pages on
+ * the inactive queue can't be re-queued [note: not
+ * true for active queue]).
+ */
+
+ if (anon)
+ simple_unlock(&anon->an_lock);
+ else if (uobj)
+ simple_unlock(&uobj->vmobjlock);
+
+ } /* if (p) */ else {
+
+ /* if p is null in this loop, make sure it stays null
+ * in next loop */
+ nextpg = NULL;
+
+ /*
+ * lock page queues here just so they're always locked
+ * at the end of the loop.
+ */
+ uvm_lock_pageq();
+ }
+
+ if (nextpg && (nextpg->pqflags & PQ_INACTIVE) == 0) {
+ printf("pagedaemon: invalid nextpg! reverting to "
+ "queue head\n");
+ nextpg = pglst->tqh_first; /* reload! */
+ }
+
+ } /* end of "inactive" 'for' loop */
+ return (retval);
+}
+
+/*
+ * uvmpd_scan: scan the page queues and attempt to meet our targets.
+ *
+ * => called with pageq's locked
+ */
+
+void
+uvmpd_scan()
+{
+ int s, free, pages_freed, page_shortage;
+ struct vm_page *p, *nextpg;
+ struct uvm_object *uobj;
+ boolean_t got_it;
+ UVMHIST_FUNC("uvmpd_scan"); UVMHIST_CALLED(pdhist);
+
+ uvmexp.pdrevs++; /* counter */
+
+#ifdef __GNUC__
+ uobj = NULL; /* XXX gcc */
+#endif
+ /*
+ * get current "free" page count
+ */
+ s = splimp();
+ uvm_lock_fpageq();
+ free = uvmexp.free;
+ uvm_unlock_fpageq();
+ splx(s);
+
+#ifndef __SWAP_BROKEN
+ /*
+ * swap out some processes if we are below our free target.
+ * we need to unlock the page queues for this.
+ */
+ if (free < uvmexp.freetarg) {
+
+ uvmexp.pdswout++;
+ UVMHIST_LOG(pdhist," free %d < target %d: swapout", free,
+ uvmexp.freetarg, 0, 0);
+ uvm_unlock_pageq();
+ uvm_swapout_threads();
+ pmap_update(); /* update so we can scan inactive q */
+ uvm_lock_pageq();
+
+ }
+#endif
+
+ /*
+ * now we want to work on meeting our targets. first we work on our
+ * free target by converting inactive pages into free pages. then
+ * we work on meeting our inactive target by converting active pages
+ * to inactive ones.
+ */
+
+ UVMHIST_LOG(pdhist, " starting 'free' loop",0,0,0,0);
+ pages_freed = uvmexp.pdfreed; /* so far... */
+
+ /*
+ * do loop #1! alternate starting queue between swap and object based
+ * on the low bit of uvmexp.pdrevs (which we bump by one each call).
+ */
+
+ got_it = FALSE;
+ if ((uvmexp.pdrevs & 1) != 0 && uvmexp.nswapdev != 0)
+ got_it = uvmpd_scan_inactive(&uvm.page_inactive_swp);
+ if (!got_it)
+ got_it = uvmpd_scan_inactive(&uvm.page_inactive_obj);
+ if (!got_it && (uvmexp.pdrevs & 1) == 0 && uvmexp.nswapdev != 0)
+ (void) uvmpd_scan_inactive(&uvm.page_inactive_swp);
+
+ /*
+ * we have done the scan to get free pages. now we work on meeting
+ * our inactive target.
+ */
+
+ page_shortage = uvmexp.inactarg - uvmexp.inactive;
+ pages_freed = uvmexp.pdfreed - pages_freed; /* # pages freed in loop */
+ if (page_shortage <= 0 && pages_freed == 0)
+ page_shortage = 1;
+
+ UVMHIST_LOG(pdhist, " second loop: page_shortage=%d", page_shortage,
+ 0, 0, 0);
+ for (p = uvm.page_active.tqh_first ;
+ p != NULL && page_shortage > 0 ; p = nextpg) {
+ nextpg = p->pageq.tqe_next;
+ if (p->flags & PG_BUSY)
+ continue; /* quick check before trying to lock */
+
+ /*
+ * lock owner
+ */
+ /* is page anon owned or ownerless? */
+ if ((p->pqflags & PQ_ANON) || p->uobject == NULL) {
+
+#ifdef DIAGNOSTIC
+ if (p->uanon == NULL)
+ panic("pagedaemon: page with no anon or "
+ "object detected - loop 2");
+#endif
+
+ if (!simple_lock_try(&p->uanon->an_lock))
+ continue;
+
+ /* take over the page? */
+ if ((p->pqflags & PQ_ANON) == 0) {
+
+#ifdef DIAGNOSTIC
+ if (p->loan_count < 1)
+ panic("pagedaemon: non-loaned "
+ "ownerless page detected - loop 2");
+#endif
+
+ p->loan_count--;
+ p->pqflags |= PQ_ANON;
+ }
+
+ } else {
+
+ if (!simple_lock_try(&p->uobject->vmobjlock))
+ continue;
+
+ }
+
+ if ((p->flags & PG_BUSY) == 0) {
+ pmap_page_protect(PMAP_PGARG(p), VM_PROT_NONE);
+ /* no need to check wire_count as pg is "active" */
+ uvm_pagedeactivate(p);
+ uvmexp.pddeact++;
+ page_shortage--;
+ }
+
+ if (p->pqflags & PQ_ANON)
+ simple_unlock(&p->uanon->an_lock);
+ else
+ simple_unlock(&p->uobject->vmobjlock);
+ }
+
+ /*
+ * done scan
+ */
+}
diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h
new file mode 100644
index 00000000000..c5aad80ef81
--- /dev/null
+++ b/sys/uvm/uvm_pdaemon.h
@@ -0,0 +1,86 @@
+/* $NetBSD: uvm_pdaemon.h,v 1.5 1998/02/10 14:12:28 mrg Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993, The Regents of the University of California.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vm_pageout.h 8.2 (Berkeley) 1/12/94
+ * from: Id: uvm_pdaemon.h,v 1.1.2.4 1998/02/02 20:07:20 chuck Exp
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#ifndef _UVM_UVM_PDAEMON_H_
+#define _UVM_UVM_PDAEMON_H_
+
+/*
+ * uvm_pdaemon.h: page daemon hooks
+ */
+
+/*
+ * prototypes
+ */
+
+void uvm_wait __P((char *));
+
+#endif /* _UVM_UVM_PDAEMON_H_ */
diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c
new file mode 100644
index 00000000000..d7fe645ebeb
--- /dev/null
+++ b/sys/uvm/uvm_pglist.c
@@ -0,0 +1,292 @@
+/* $NetBSD: uvm_pglist.c,v 1.6 1998/08/13 02:11:03 eeh Exp $ */
+
+#define VM_PAGE_ALLOC_MEMORY_STATS
+
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
+ * NASA Ames Research Center.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the NetBSD
+ * Foundation, Inc. and its contributors.
+ * 4. Neither the name of The NetBSD Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * uvm_pglist.c: pglist functions
+ *
+ * XXX: was part of uvm_page but has an incompatable copyright so it
+ * gets its own file now.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+
+#ifdef VM_PAGE_ALLOC_MEMORY_STATS
+#define STAT_INCR(v) (v)++
+#define STAT_DECR(v) do { \
+ if ((v) == 0) \
+ printf("%s:%d -- Already 0!\n", __FILE__, __LINE__); \
+ else \
+ (v)--; \
+ } while (0)
+u_long uvm_pglistalloc_npages;
+#else
+#define STAT_INCR(v)
+#define STAT_DECR(v)
+#endif
+
+/*
+ * uvm_pglistalloc: allocate a list of pages
+ *
+ * => allocated pages are placed at the tail of rlist. rlist is
+ * assumed to be properly initialized by caller.
+ * => returns 0 on success or errno on failure
+ * => XXX: implementation allocates only a single segment, also
+ * might be able to better advantage of vm_physeg[].
+ * => doesn't take into account clean non-busy pages on inactive list
+ * that could be used(?)
+ * => params:
+ * size the size of the allocation, rounded to page size.
+ * low the low address of the allowed allocation range.
+ * high the high address of the allowed allocation range.
+ * alignment memory must be aligned to this power-of-two boundary.
+ * boundary no segment in the allocation may cross this
+ * power-of-two boundary (relative to zero).
+ */
+
+int
+uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
+ psize_t size;
+ paddr_t low, high, alignment, boundary;
+ struct pglist *rlist;
+ int nsegs, waitok;
+{
+ paddr_t try, idxpa, lastidxpa;
+ int psi;
+ struct vm_page *pgs;
+ int s, tryidx, idx, end, error, free_list;
+ vm_page_t m;
+ u_long pagemask;
+#ifdef DEBUG
+ vm_page_t tp;
+#endif
+
+#ifdef DIAGNOSTIC
+ if ((alignment & (alignment - 1)) != 0)
+ panic("vm_page_alloc_memory: alignment must be power of 2");
+
+ if ((boundary & (boundary - 1)) != 0)
+ panic("vm_page_alloc_memory: boundary must be power of 2");
+#endif
+
+ /*
+ * Our allocations are always page granularity, so our alignment
+ * must be, too.
+ */
+ if (alignment < PAGE_SIZE)
+ alignment = PAGE_SIZE;
+
+ size = round_page(size);
+ try = roundup(low, alignment);
+
+ if (boundary != 0 && boundary < size)
+ return (EINVAL);
+
+ pagemask = ~(boundary - 1);
+
+ /* Default to "lose". */
+ error = ENOMEM;
+
+ /*
+ * Block all memory allocation and lock the free list.
+ */
+ s = splimp();
+ uvm_lock_fpageq(); /* lock free page queue */
+
+ /* Are there even any free pages? */
+ for (idx = 0; idx < VM_NFREELIST; idx++)
+ if (uvm.page_free[idx].tqh_first != NULL)
+ break;
+ if (idx == VM_NFREELIST)
+ goto out;
+
+ for (;; try += alignment) {
+ if (try + size > high) {
+ /*
+ * We've run past the allowable range.
+ */
+ goto out;
+ }
+
+ /*
+ * Make sure this is a managed physical page.
+ */
+
+ if ((psi = vm_physseg_find(atop(try), &idx)) == -1)
+ continue; /* managed? */
+ if (vm_physseg_find(atop(try + size), NULL) != psi)
+ continue; /* end must be in this segment */
+
+ tryidx = idx;
+ end = idx + (size / PAGE_SIZE);
+ pgs = vm_physmem[psi].pgs;
+
+ /*
+ * Found a suitable starting page. See of the range is free.
+ */
+ for (; idx < end; idx++) {
+ if (VM_PAGE_IS_FREE(&pgs[idx]) == 0) {
+ /*
+ * Page not available.
+ */
+ break;
+ }
+
+ idxpa = VM_PAGE_TO_PHYS(&pgs[idx]);
+
+ if (idx > tryidx) {
+ lastidxpa = VM_PAGE_TO_PHYS(&pgs[idx - 1]);
+
+ if ((lastidxpa + PAGE_SIZE) != idxpa) {
+ /*
+ * Region not contiguous.
+ */
+ break;
+ }
+ if (boundary != 0 &&
+ ((lastidxpa ^ idxpa) & pagemask) != 0) {
+ /*
+ * Region crosses boundary.
+ */
+ break;
+ }
+ }
+ }
+
+ if (idx == end) {
+ /*
+ * Woo hoo! Found one.
+ */
+ break;
+ }
+ }
+
+ /*
+ * we have a chunk of memory that conforms to the requested constraints.
+ */
+ idx = tryidx;
+ while (idx < end) {
+ m = &pgs[idx];
+ free_list = uvm_page_lookup_freelist(m);
+#ifdef DEBUG
+ for (tp = uvm.page_free[free_list].tqh_first;
+ tp != NULL; tp = tp->pageq.tqe_next) {
+ if (tp == m)
+ break;
+ }
+ if (tp == NULL)
+ panic("uvm_pglistalloc: page not on freelist");
+#endif
+ TAILQ_REMOVE(&uvm.page_free[free_list], m, pageq);
+ uvmexp.free--;
+ m->flags = PG_CLEAN;
+ m->pqflags = 0;
+ m->uobject = NULL;
+ m->uanon = NULL;
+ m->wire_count = 0;
+ m->loan_count = 0;
+ TAILQ_INSERT_TAIL(rlist, m, pageq);
+ idx++;
+ STAT_INCR(uvm_pglistalloc_npages);
+ }
+ error = 0;
+
+out:
+ uvm_unlock_fpageq();
+ splx(s);
+
+ /*
+ * check to see if we need to generate some free pages waking
+ * the pagedaemon.
+ * XXX: we read uvm.free without locking
+ */
+
+ if (uvmexp.free < uvmexp.freemin ||
+ (uvmexp.free < uvmexp.freetarg &&
+ uvmexp.inactive < uvmexp.inactarg))
+ thread_wakeup(&uvm.pagedaemon);
+
+ return (error);
+}
+
+/*
+ * uvm_pglistfree: free a list of pages
+ *
+ * => pages should already be unmapped
+ */
+
+void
+uvm_pglistfree(list)
+ struct pglist *list;
+{
+ vm_page_t m;
+ int s;
+
+ /*
+ * Block all memory allocation and lock the free list.
+ */
+ s = splimp();
+ uvm_lock_fpageq();
+
+ while ((m = list->tqh_first) != NULL) {
+#ifdef DIAGNOSTIC
+ if (m->pqflags & (PQ_ACTIVE|PQ_INACTIVE))
+ panic("uvm_pglistfree: active/inactive page!");
+#endif
+ TAILQ_REMOVE(list, m, pageq);
+ m->pqflags = PQ_FREE;
+ TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(m)],
+ m, pageq);
+ uvmexp.free++;
+ STAT_DECR(uvm_pglistalloc_npages);
+ }
+
+ uvm_unlock_fpageq();
+ splx(s);
+}
diff --git a/sys/uvm/uvm_stat.c b/sys/uvm/uvm_stat.c
new file mode 100644
index 00000000000..fbe3139c116
--- /dev/null
+++ b/sys/uvm/uvm_stat.c
@@ -0,0 +1,253 @@
+/* $NetBSD: uvm_stat.c,v 1.10 1998/06/20 13:19:00 mrg Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_stat.c,v 1.1.2.3 1997/12/19 15:01:00 mrg Exp
+ */
+
+/*
+ * uvm_stat.c
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <vm/vm.h>
+
+#include <uvm/uvm.h>
+
+/*
+ * globals
+ */
+
+struct uvm_cnt *uvm_cnt_head = NULL;
+
+#ifdef UVMHIST
+struct uvm_history_head uvm_histories;
+#endif
+
+#ifdef UVMHIST_PRINT
+int uvmhist_print_enabled = 1;
+#endif
+
+/*
+ * prototypes
+ */
+
+#ifdef UVMHIST
+void uvmhist_dump __P((struct uvm_history *));
+void uvm_hist __P((u_int32_t));
+static void uvmhist_dump_histories __P((struct uvm_history *[]));
+#endif
+void uvmcnt_dump __P((void));
+void uvm_dump __P((void));
+
+
+#ifdef UVMHIST
+/* call this from ddb */
+void
+uvmhist_dump(l)
+ struct uvm_history *l;
+{
+ int lcv, s;
+
+ s = splhigh();
+ lcv = l->f;
+ do {
+ if (l->e[lcv].fmt)
+ uvmhist_print(&l->e[lcv]);
+ lcv = (lcv + 1) % l->n;
+ } while (lcv != l->f);
+ splx(s);
+}
+
+/*
+ * print a merged list of uvm_history structures
+ */
+static void
+uvmhist_dump_histories(hists)
+ struct uvm_history *hists[];
+{
+ struct timeval tv;
+ int cur[MAXHISTS];
+ int s, lcv, hi;
+
+ /* so we don't get corrupted lists! */
+ s = splhigh();
+
+ /* find the first of each list */
+ for (lcv = 0; hists[lcv]; lcv++)
+ cur[lcv] = hists[lcv]->f;
+
+ /*
+ * here we loop "forever", finding the next earliest
+ * history entry and printing it. cur[X] is the current
+ * entry to test for the history in hists[X]. if it is
+ * -1, then this history is finished.
+ */
+ for (;;) {
+ hi = -1;
+ tv.tv_sec = tv.tv_usec = 0;
+
+ /* loop over each history */
+ for (lcv = 0; hists[lcv]; lcv++) {
+restart:
+ if (cur[lcv] == -1)
+ continue;
+
+ /*
+ * if the format is empty, go to the next entry
+ * and retry.
+ */
+ if (hists[lcv]->e[cur[lcv]].fmt == NULL) {
+ cur[lcv] = (cur[lcv] + 1) % (hists[lcv]->n);
+ if (cur[lcv] == hists[lcv]->f)
+ cur[lcv] = -1;
+ goto restart;
+ }
+
+ /*
+ * if the time hasn't been set yet, or this entry is
+ * earlier than the current tv, set the time and history
+ * index.
+ */
+ if (tv.tv_sec == 0 ||
+ timercmp(&hists[lcv]->e[cur[lcv]].tv, &tv, <)) {
+ tv = hists[lcv]->e[cur[lcv]].tv;
+ hi = lcv;
+ }
+ }
+
+ /* if we didn't find any entries, we must be done */
+ if (hi == -1)
+ break;
+
+ /* print and move to the next entry */
+ uvmhist_print(&hists[hi]->e[cur[hi]]);
+ cur[hi] = (cur[hi] + 1) % (hists[hi]->n);
+ if (cur[hi] == hists[hi]->f)
+ cur[hi] = -1;
+ }
+
+ /* done! */
+ splx(s);
+}
+
+/*
+ * call this from ddb. `bitmask' is from <uvm/uvm_stat.h>. it
+ * merges the named histories.
+ */
+void
+uvm_hist(bitmask)
+ u_int32_t bitmask; /* XXX only support 32 hists */
+{
+ struct uvm_history *hists[MAXHISTS + 1];
+ int i = 0;
+
+ if ((bitmask & UVMHIST_MAPHIST) || bitmask == 0)
+ hists[i++] = &maphist;
+
+ if ((bitmask & UVMHIST_PDHIST) || bitmask == 0)
+ hists[i++] = &pdhist;
+
+ hists[i] = NULL;
+
+ uvmhist_dump_histories(hists);
+}
+#endif /* UVMHIST */
+
+void
+uvmcnt_dump()
+{
+ struct uvm_cnt *uvc = uvm_cnt_head;
+
+ while (uvc) {
+ if ((uvc->t & UVMCNT_MASK) != UVMCNT_CNT)
+ continue;
+ printf("%s = %d\n", uvc->name, uvc->c);
+ uvc = uvc->next;
+ }
+}
+
+/*
+ * uvm_dump: ddb hook to dump interesting uvm counters
+ */
+void
+uvm_dump()
+{
+
+ printf("Current UVM status:\n");
+ printf(" pagesize=%d (0x%x), pagemask=0x%x, pageshift=%d\n",
+ uvmexp.pagesize, uvmexp.pagesize, uvmexp.pagemask,
+ uvmexp.pageshift);
+ printf(" %d VM pages: %d active, %d inactive, %d wired, %d free\n",
+ uvmexp.npages, uvmexp.active, uvmexp.inactive, uvmexp.wired,
+ uvmexp.free);
+ printf(" freemin=%d, free-target=%d, inactive-target=%d, "
+ "wired-max=%d\n", uvmexp.freemin, uvmexp.freetarg, uvmexp.inactarg,
+ uvmexp.wiredmax);
+ printf(" faults=%d, traps=%d, intrs=%d, ctxswitch=%d\n",
+ uvmexp.faults, uvmexp.traps, uvmexp.intrs, uvmexp.swtch);
+ printf(" softint=%d, syscalls=%d, swapins=%d, swapouts=%d\n",
+ uvmexp.softs, uvmexp.syscalls, uvmexp.swapins, uvmexp.swapouts);
+
+ printf(" fault counts:\n");
+ printf(" noram=%d, noanon=%d, pgwait=%d, pgrele=%d\n",
+ uvmexp.fltnoram, uvmexp.fltnoanon, uvmexp.fltpgwait,
+ uvmexp.fltpgrele);
+ printf(" ok relocks(total)=%d(%d), anget(retrys)=%d(%d), "
+ "amapcopy=%d\n", uvmexp.fltrelckok, uvmexp.fltrelck,
+ uvmexp.fltanget, uvmexp.fltanretry, uvmexp.fltamcopy);
+ printf(" neighbor anon/obj pg=%d/%d, gets(lock/unlock)=%d/%d\n",
+ uvmexp.fltnamap, uvmexp.fltnomap, uvmexp.fltlget, uvmexp.fltget);
+ printf(" cases: anon=%d, anoncow=%d, obj=%d, prcopy=%d, przero=%d\n",
+ uvmexp.flt_anon, uvmexp.flt_acow, uvmexp.flt_obj, uvmexp.flt_prcopy,
+ uvmexp.flt_przero);
+
+ printf(" daemon and swap counts:\n");
+ printf(" woke=%d, revs=%d, scans=%d, swout=%d\n", uvmexp.pdwoke,
+ uvmexp.pdrevs, uvmexp.pdscans, uvmexp.pdswout);
+ printf(" busy=%d, freed=%d, reactivate=%d, deactivate=%d\n",
+ uvmexp.pdbusy, uvmexp.pdfreed, uvmexp.pdreact, uvmexp.pddeact);
+ printf(" pageouts=%d, pending=%d, nswget=%d\n", uvmexp.pdpageouts,
+ uvmexp.pdpending, uvmexp.nswget);
+ printf(" nswapdev=%d, nanon=%d, nfreeanon=%d\n", uvmexp.nswapdev,
+ uvmexp.nanon, uvmexp.nfreeanon);
+
+ printf(" kernel pointers:\n");
+ printf(" objs(kern/kmem/mb)=%p/%p/%p\n", uvm.kernel_object,
+ uvmexp.kmem_object, uvmexp.mb_object);
+}
diff --git a/sys/uvm/uvm_stat.h b/sys/uvm/uvm_stat.h
new file mode 100644
index 00000000000..62ce32fe46e
--- /dev/null
+++ b/sys/uvm/uvm_stat.h
@@ -0,0 +1,245 @@
+/* $NetBSD: uvm_stat.h,v 1.13 1998/08/09 22:36:39 perry Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_stat.h,v 1.1.2.4 1998/02/07 01:16:56 chs Exp
+ */
+
+#ifndef _UVM_UVM_STAT_H_
+#define _UVM_UVM_STAT_H_
+
+#include <sys/queue.h>
+
+/*
+ * uvm_stat: monitor what is going on with uvm (or whatever)
+ */
+
+/*
+ * counters [XXX: maybe replace event counters with this]
+ */
+
+#define UVMCNT_MASK 0xf /* rest are private */
+#define UVMCNT_CNT 0 /* normal counter */
+#define UVMCNT_DEV 1 /* device event counter */
+
+struct uvm_cnt {
+ int c; /* the value */
+ int t; /* type */
+ struct uvm_cnt *next; /* global list of cnts */
+ char *name; /* counter name */
+ void *p; /* private data */
+};
+
+extern struct uvm_cnt *uvm_cnt_head;
+
+/*
+ * counter operations. assume spl is set ok.
+ */
+
+#define UVMCNT_INIT(CNT,TYP,VAL,NAM,PRIV) \
+do { \
+ CNT.c = VAL; \
+ CNT.t = TYP; \
+ CNT.next = uvm_cnt_head; \
+ uvm_cnt_head = &CNT; \
+ CNT.name = NAM; \
+ CNT.p = PRIV; \
+} while (0)
+
+#define UVMCNT_SET(C,V) \
+do { \
+ (C).c = (V); \
+} while (0)
+
+#define UVMCNT_ADD(C,V) \
+do { \
+ (C).c += (V); \
+} while (0)
+
+#define UVMCNT_INCR(C) UVMCNT_ADD(C,1)
+#define UVMCNT_DECR(C) UVMCNT_ADD(C,-1)
+
+
+/*
+ * history/tracing
+ */
+
+struct uvm_history_ent {
+ struct timeval tv; /* time stamp */
+ char *fmt; /* printf format */
+ size_t fmtlen; /* length of printf format */
+ char *fn; /* function name */
+ size_t fnlen; /* length of function name */
+ u_long call; /* function call number */
+ u_long v[4]; /* values */
+};
+
+struct uvm_history {
+ const char *name; /* name of this this history */
+ size_t namelen; /* length of name, not including null */
+ LIST_ENTRY(uvm_history) list; /* link on list of all histories */
+ int n; /* number of entries */
+ int f; /* next free one */
+ simple_lock_data_t l; /* lock on this history */
+ struct uvm_history_ent *e; /* the malloc'd entries */
+};
+
+LIST_HEAD(uvm_history_head, uvm_history);
+
+/*
+ * grovelling lists all at once. we currently do not allow more than
+ * 32 histories to exist, as the way to dump a number of them at once
+ * is by calling uvm_hist() with a bitmask.
+ */
+
+/* this is used to set the size of some arrays */
+#define MAXHISTS 32 /* do not change this! */
+
+/* and these are the bit values of each history */
+#define UVMHIST_MAPHIST 0x00000001 /* maphist */
+#define UVMHIST_PDHIST 0x00000002 /* pdhist */
+
+/*
+ * macros to use the history/tracing code. note that UVMHIST_LOG
+ * must take 4 arguments (even if they are ignored by the format).
+ */
+#ifndef UVMHIST
+#define UVMHIST_DECL(NAME)
+#define UVMHIST_INIT(NAME,N)
+#define UVMHIST_INIT_STATIC(NAME,BUF)
+#define UVMHIST_LOG(NAME,FMT,A,B,C,D)
+#define UVMHIST_CALLED(NAME)
+#define UVMHIST_FUNC(FNAME)
+#define uvmhist_dump(NAME)
+#else
+extern struct uvm_history_head uvm_histories;
+
+#define UVMHIST_DECL(NAME) struct uvm_history NAME
+
+#define UVMHIST_INIT(NAME,N) \
+do { \
+ (NAME).name = __STRING(NAME); \
+ (NAME).namelen = strlen((NAME).name); \
+ (NAME).n = (N); \
+ (NAME).f = 0; \
+ simple_lock_init(&(NAME).l); \
+ (NAME).e = (struct uvm_history_ent *) \
+ malloc(sizeof(struct uvm_history_ent) * (N), M_TEMP, \
+ M_WAITOK); \
+ bzero((NAME).e, sizeof(struct uvm_history_ent) * (N)); \
+ LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \
+} while (0)
+
+#define UVMHIST_INIT_STATIC(NAME,BUF) \
+do { \
+ (NAME).name = __STRING(NAME); \
+ (NAME).namelen = strlen((NAME).name); \
+ (NAME).n = sizeof(BUF) / sizeof(struct uvm_history_ent); \
+ (NAME).f = 0; \
+ simple_lock_init(&(NAME).l); \
+ (NAME).e = (struct uvm_history_ent *) (BUF); \
+ bzero((NAME).e, sizeof(struct uvm_history_ent) * (NAME).n); \
+ LIST_INSERT_HEAD(&uvm_histories, &(NAME), list); \
+} while (0)
+
+extern int cold;
+
+#if defined(UVMHIST_PRINT)
+extern int uvmhist_print_enabled;
+#define UVMHIST_PRINTNOW(E) \
+do { \
+ if (uvmhist_print_enabled) { \
+ uvmhist_print(E); \
+ DELAY(100000); \
+ } \
+} while (0)
+#else
+#define UVMHIST_PRINTNOW(E) /* nothing */
+#endif
+
+#define UVMHIST_LOG(NAME,FMT,A,B,C,D) \
+do { \
+ register int i, s = splhigh(); \
+ simple_lock(&(NAME).l); \
+ i = (NAME).f; \
+ (NAME).f = (i + 1) % (NAME).n; \
+ simple_unlock(&(NAME).l); \
+ splx(s); \
+ if (!cold) \
+ microtime(&(NAME).e[i].tv); \
+ (NAME).e[i].fmt = (FMT); \
+ (NAME).e[i].fmtlen = strlen((NAME).e[i].fmt); \
+ (NAME).e[i].fn = _uvmhist_name; \
+ (NAME).e[i].fnlen = strlen((NAME).e[i].fn); \
+ (NAME).e[i].call = _uvmhist_call; \
+ (NAME).e[i].v[0] = (u_long)(A); \
+ (NAME).e[i].v[1] = (u_long)(B); \
+ (NAME).e[i].v[2] = (u_long)(C); \
+ (NAME).e[i].v[3] = (u_long)(D); \
+ UVMHIST_PRINTNOW(&((NAME).e[i])); \
+} while (0)
+
+#define UVMHIST_CALLED(NAME) \
+do { \
+ { \
+ int s = splhigh(); \
+ simple_lock(&(NAME).l); \
+ _uvmhist_call = _uvmhist_cnt++; \
+ simple_unlock(&(NAME).l); \
+ splx(s); \
+ } \
+ UVMHIST_LOG(NAME,"called!", 0, 0, 0, 0); \
+} while (0)
+
+#define UVMHIST_FUNC(FNAME) \
+ static int _uvmhist_cnt = 0; \
+ static char *_uvmhist_name = FNAME; \
+ int _uvmhist_call;
+
+static __inline void uvmhist_print __P((struct uvm_history_ent *));
+
+static __inline void
+uvmhist_print(e)
+ struct uvm_history_ent *e;
+{
+ printf("%06ld.%06ld ", e->tv.tv_sec, e->tv.tv_usec);
+ printf("%s#%ld: ", e->fn, e->call);
+ printf(e->fmt, e->v[0], e->v[1], e->v[2], e->v[3]);
+ printf("\n");
+}
+#endif /* UVMHIST */
+
+#endif /* _UVM_UVM_STAT_H_ */
diff --git a/sys/uvm/uvm_swap.c b/sys/uvm/uvm_swap.c
new file mode 100644
index 00000000000..9fb7611e7a5
--- /dev/null
+++ b/sys/uvm/uvm_swap.c
@@ -0,0 +1,1977 @@
+/* $NetBSD: uvm_swap.c,v 1.23 1998/12/26 06:25:59 marc Exp $ */
+
+/*
+ * Copyright (c) 1995, 1996, 1997 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: NetBSD: vm_swap.c,v 1.52 1997/12/02 13:47:37 pk Exp
+ * from: Id: uvm_swap.c,v 1.1.2.42 1998/02/02 20:38:06 chuck Exp
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/disklabel.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/extent.h>
+#include <sys/mount.h>
+#include <sys/pool.h>
+#include <sys/syscallargs.h>
+#include <sys/swap.h>
+
+#include <vm/vm.h>
+#include <vm/vm_conf.h>
+
+#include <uvm/uvm.h>
+
+#include <miscfs/specfs/specdev.h>
+
+/*
+ * uvm_swap.c: manage configuration and i/o to swap space.
+ */
+
+/*
+ * swap space is managed in the following way:
+ *
+ * each swap partition or file is described by a "swapdev" structure.
+ * each "swapdev" structure contains a "swapent" structure which contains
+ * information that is passed up to the user (via system calls).
+ *
+ * each swap partition is assigned a "priority" (int) which controls
+ * swap parition usage.
+ *
+ * the system maintains a global data structure describing all swap
+ * partitions/files. there is a sorted LIST of "swappri" structures
+ * which describe "swapdev"'s at that priority. this LIST is headed
+ * by the "swap_priority" global var. each "swappri" contains a
+ * CIRCLEQ of "swapdev" structures at that priority.
+ *
+ * the system maintains a fixed pool of "swapbuf" structures for use
+ * at swap i/o time. a swapbuf includes a "buf" structure and an
+ * "aiodone" [we want to avoid malloc()'ing anything at swapout time
+ * since memory may be low].
+ *
+ * locking:
+ * - swap_syscall_lock (sleep lock): this lock serializes the swapctl
+ * system call and prevents the swap priority list from changing
+ * while we are in the middle of a system call (e.g. SWAP_STATS).
+ * - swap_data_lock (simple_lock): this lock protects all swap data
+ * structures including the priority list, the swapdev structures,
+ * and the swapmap extent.
+ * - swap_buf_lock (simple_lock): this lock protects the free swapbuf
+ * pool.
+ *
+ * each swap device has the following info:
+ * - swap device in use (could be disabled, preventing future use)
+ * - swap enabled (allows new allocations on swap)
+ * - map info in /dev/drum
+ * - vnode pointer
+ * for swap files only:
+ * - block size
+ * - max byte count in buffer
+ * - buffer
+ * - credentials to use when doing i/o to file
+ *
+ * userland controls and configures swap with the swapctl(2) system call.
+ * the sys_swapctl performs the following operations:
+ * [1] SWAP_NSWAP: returns the number of swap devices currently configured
+ * [2] SWAP_STATS: given a pointer to an array of swapent structures
+ * (passed in via "arg") of a size passed in via "misc" ... we load
+ * the current swap config into the array.
+ * [3] SWAP_ON: given a pathname in arg (could be device or file) and a
+ * priority in "misc", start swapping on it.
+ * [4] SWAP_OFF: as SWAP_ON, but stops swapping to a device
+ * [5] SWAP_CTL: changes the priority of a swap device (new priority in
+ * "misc")
+ */
+
+/*
+ * SWAP_TO_FILES: allows swapping to plain files.
+ */
+
+#define SWAP_TO_FILES
+
+/*
+ * swapdev: describes a single swap partition/file
+ *
+ * note the following should be true:
+ * swd_inuse <= swd_nblks [number of blocks in use is <= total blocks]
+ * swd_nblks <= swd_mapsize [because mapsize includes miniroot+disklabel]
+ */
+struct swapdev {
+ struct oswapent swd_ose;
+#define swd_dev swd_ose.ose_dev /* device id */
+#define swd_flags swd_ose.ose_flags /* flags:inuse/enable/fake */
+#define swd_priority swd_ose.ose_priority /* our priority */
+ /* also: swd_ose.ose_nblks, swd_ose.ose_inuse */
+ char *swd_path; /* saved pathname of device */
+ int swd_pathlen; /* length of pathname */
+ int swd_npages; /* #pages we can use */
+ int swd_npginuse; /* #pages in use */
+ int swd_drumoffset; /* page0 offset in drum */
+ int swd_drumsize; /* #pages in drum */
+ struct extent *swd_ex; /* extent for this swapdev */
+ struct vnode *swd_vp; /* backing vnode */
+ CIRCLEQ_ENTRY(swapdev) swd_next; /* priority circleq */
+
+#ifdef SWAP_TO_FILES
+ int swd_bsize; /* blocksize (bytes) */
+ int swd_maxactive; /* max active i/o reqs */
+ struct buf swd_tab; /* buffer list */
+ struct ucred *swd_cred; /* cred for file access */
+#endif
+};
+
+/*
+ * swap device priority entry; the list is kept sorted on `spi_priority'.
+ */
+struct swappri {
+ int spi_priority; /* priority */
+ CIRCLEQ_HEAD(spi_swapdev, swapdev) spi_swapdev;
+ /* circleq of swapdevs at this priority */
+ LIST_ENTRY(swappri) spi_swappri; /* global list of pri's */
+};
+
+/*
+ * swapbuf, swapbuffer plus async i/o info
+ */
+struct swapbuf {
+ struct buf sw_buf; /* a buffer structure */
+ struct uvm_aiodesc sw_aio; /* aiodesc structure, used if ASYNC */
+ SIMPLEQ_ENTRY(swapbuf) sw_sq; /* free list pointer */
+};
+
+/*
+ * The following two structures are used to keep track of data transfers
+ * on swap devices associated with regular files.
+ * NOTE: this code is more or less a copy of vnd.c; we use the same
+ * structure names here to ease porting..
+ */
+struct vndxfer {
+ struct buf *vx_bp; /* Pointer to parent buffer */
+ struct swapdev *vx_sdp;
+ int vx_error;
+ int vx_pending; /* # of pending aux buffers */
+ int vx_flags;
+#define VX_BUSY 1
+#define VX_DEAD 2
+};
+
+struct vndbuf {
+ struct buf vb_buf;
+ struct vndxfer *vb_xfer;
+};
+
+
+/*
+ * We keep a of pool vndbuf's and vndxfer structures.
+ */
+struct pool *vndxfer_pool;
+struct pool *vndbuf_pool;
+
+#define getvndxfer(vnx) do { \
+ int s = splbio(); \
+ vnx = (struct vndxfer *) \
+ pool_get(vndxfer_pool, PR_MALLOCOK|PR_WAITOK); \
+ splx(s); \
+} while (0)
+
+#define putvndxfer(vnx) { \
+ pool_put(vndxfer_pool, (void *)(vnx)); \
+}
+
+#define getvndbuf(vbp) do { \
+ int s = splbio(); \
+ vbp = (struct vndbuf *) \
+ pool_get(vndbuf_pool, PR_MALLOCOK|PR_WAITOK); \
+ splx(s); \
+} while (0)
+
+#define putvndbuf(vbp) { \
+ pool_put(vndbuf_pool, (void *)(vbp)); \
+}
+
+
+/*
+ * local variables
+ */
+static struct extent *swapmap; /* controls the mapping of /dev/drum */
+SIMPLEQ_HEAD(swapbufhead, swapbuf);
+struct pool *swapbuf_pool;
+
+/* list of all active swap devices [by priority] */
+LIST_HEAD(swap_priority, swappri);
+static struct swap_priority swap_priority;
+
+/* locks */
+lock_data_t swap_syscall_lock;
+static simple_lock_data_t swap_data_lock;
+
+/*
+ * prototypes
+ */
+#ifdef notyet
+static void swapdrum_add __P((struct swapdev *, int));
+#endif
+static struct swapdev *swapdrum_getsdp __P((int));
+
+#ifdef notyet /* swapctl */
+static struct swapdev *swaplist_find __P((struct vnode *, int));
+static void swaplist_insert __P((struct swapdev *,
+ struct swappri *, int));
+static void swaplist_trim __P((void));
+
+static int swap_on __P((struct proc *, struct swapdev *));
+#endif
+#ifdef SWAP_OFF_WORKS
+static int swap_off __P((struct proc *, struct swapdev *));
+#endif
+
+#ifdef SWAP_TO_FILES
+static void sw_reg_strategy __P((struct swapdev *, struct buf *, int));
+static void sw_reg_iodone __P((struct buf *));
+static void sw_reg_start __P((struct swapdev *));
+#endif
+
+static void uvm_swap_aiodone __P((struct uvm_aiodesc *));
+static void uvm_swap_bufdone __P((struct buf *));
+static int uvm_swap_io __P((struct vm_page **, int, int, int));
+
+/*
+ * uvm_swap_init: init the swap system data structures and locks
+ *
+ * => called at boot time from init_main.c after the filesystems
+ * are brought up (which happens after uvm_init())
+ */
+void
+uvm_swap_init()
+{
+ UVMHIST_FUNC("uvm_swap_init");
+
+ UVMHIST_CALLED(pdhist);
+ /*
+ * first, init the swap list, its counter, and its lock.
+ * then get a handle on the vnode for /dev/drum by using
+ * the its dev_t number ("swapdev", from MD conf.c).
+ */
+
+ LIST_INIT(&swap_priority);
+ uvmexp.nswapdev = 0;
+ lockinit(&swap_syscall_lock, PVM, "swapsys", 0, 0);
+ simple_lock_init(&swap_data_lock);
+
+ if (bdevvp(swapdev, &swapdev_vp))
+ panic("uvm_swap_init: can't get vnode for swap device");
+
+ /*
+ * create swap block resource map to map /dev/drum. the range
+ * from 1 to INT_MAX allows 2 gigablocks of swap space. note
+ * that block 0 is reserved (used to indicate an allocation
+ * failure, or no allocation).
+ */
+ swapmap = extent_create("swapmap", 1, INT_MAX,
+ M_VMSWAP, 0, 0, EX_NOWAIT);
+ if (swapmap == 0)
+ panic("uvm_swap_init: extent_create failed");
+
+ /*
+ * allocate our private pool of "swapbuf" structures (includes
+ * a "buf" structure). ["nswbuf" comes from param.c and can
+ * be adjusted by MD code before we get here].
+ */
+
+ swapbuf_pool =
+ pool_create(sizeof(struct swapbuf), 0, 0, 0, "swp buf", 0,
+ NULL, NULL, 0);
+ if (swapbuf_pool == NULL)
+ panic("swapinit: pool_create failed");
+ /* XXX - set a maximum on swapbuf_pool? */
+
+ vndxfer_pool =
+ pool_create(sizeof(struct vndxfer), 0, 0, 0, "swp vnx", 0,
+ NULL, NULL, 0);
+ if (vndxfer_pool == NULL)
+ panic("swapinit: pool_create failed");
+
+ vndbuf_pool =
+ pool_create(sizeof(struct vndbuf), 0, 0, 0, "swp vnd", 0,
+ NULL, NULL, 0);
+ if (vndbuf_pool == NULL)
+ panic("swapinit: pool_create failed");
+ /*
+ * done!
+ */
+ UVMHIST_LOG(pdhist, "<- done", 0, 0, 0, 0);
+}
+
+/*
+ * swaplist functions: functions that operate on the list of swap
+ * devices on the system.
+ */
+
+/*
+ * swaplist_insert: insert swap device "sdp" into the global list
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ * => caller must provide a newly malloc'd swappri structure (we will
+ * FREE it if we don't need it... this it to prevent malloc blocking
+ * here while adding swap)
+ */
+#ifdef notyet /* used by swapctl */
+static void
+swaplist_insert(sdp, newspp, priority)
+ struct swapdev *sdp;
+ struct swappri *newspp;
+ int priority;
+{
+ struct swappri *spp, *pspp;
+ UVMHIST_FUNC("swaplist_insert"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * find entry at or after which to insert the new device.
+ */
+ for (pspp = NULL, spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ if (priority <= spp->spi_priority)
+ break;
+ pspp = spp;
+ }
+
+ /*
+ * new priority?
+ */
+ if (spp == NULL || spp->spi_priority != priority) {
+ spp = newspp; /* use newspp! */
+ UVMHIST_LOG(pdhist, "created new swappri = %d", priority, 0, 0, 0);
+
+ spp->spi_priority = priority;
+ CIRCLEQ_INIT(&spp->spi_swapdev);
+
+ if (pspp)
+ LIST_INSERT_AFTER(pspp, spp, spi_swappri);
+ else
+ LIST_INSERT_HEAD(&swap_priority, spp, spi_swappri);
+ } else {
+ /* we don't need a new priority structure, free it */
+ FREE(newspp, M_VMSWAP);
+ }
+
+ /*
+ * priority found (or created). now insert on the priority's
+ * circleq list and bump the total number of swapdevs.
+ */
+ sdp->swd_priority = priority;
+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+ uvmexp.nswapdev++;
+
+ /*
+ * done!
+ */
+}
+#endif
+
+#ifdef notyet /* used by swapctl */
+/*
+ * swaplist_find: find and optionally remove a swap device from the
+ * global list.
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ * => we return the swapdev we found (and removed)
+ */
+static struct swapdev *
+swaplist_find(vp, remove)
+ struct vnode *vp;
+ boolean_t remove;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+
+ /*
+ * search the lists for the requested vp
+ */
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next)
+ if (sdp->swd_vp == vp) {
+ if (remove) {
+ CIRCLEQ_REMOVE(&spp->spi_swapdev,
+ sdp, swd_next);
+ uvmexp.nswapdev--;
+ }
+ return(sdp);
+ }
+ }
+ return (NULL);
+}
+
+
+/*
+ * swaplist_trim: scan priority list for empty priority entries and kill
+ * them.
+ *
+ * => caller must hold both swap_syscall_lock and swap_data_lock
+ */
+static void
+swaplist_trim()
+{
+ struct swappri *spp, *nextspp;
+
+ for (spp = swap_priority.lh_first; spp != NULL; spp = nextspp) {
+ nextspp = spp->spi_swappri.le_next;
+ if (spp->spi_swapdev.cqh_first != (void *)&spp->spi_swapdev)
+ continue;
+ LIST_REMOVE(spp, spi_swappri);
+ free((caddr_t)spp, M_VMSWAP);
+ }
+}
+
+/*
+ * swapdrum_add: add a "swapdev"'s blocks into /dev/drum's area.
+ *
+ * => caller must hold swap_syscall_lock
+ * => swap_data_lock should be unlocked (we may sleep)
+ */
+static void
+swapdrum_add(sdp, npages)
+ struct swapdev *sdp;
+ int npages;
+{
+ u_long result;
+
+ if (extent_alloc(swapmap, npages, EX_NOALIGN, EX_NOBOUNDARY,
+ EX_WAITOK, &result))
+ panic("swapdrum_add");
+
+ sdp->swd_drumoffset = result;
+ sdp->swd_drumsize = npages;
+}
+#endif
+
+/*
+ * swapdrum_getsdp: given a page offset in /dev/drum, convert it back
+ * to the "swapdev" that maps that section of the drum.
+ *
+ * => each swapdev takes one big contig chunk of the drum
+ * => caller must hold swap_data_lock
+ */
+static struct swapdev *
+swapdrum_getsdp(pgno)
+ int pgno;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next)
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next)
+ if (pgno >= sdp->swd_drumoffset &&
+ pgno < (sdp->swd_drumoffset + sdp->swd_drumsize)) {
+ return sdp;
+ }
+ return NULL;
+}
+
+
+/*XXX
+ *XXX
+ *XXX*/
+int
+sys_swapon(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ return EINVAL;
+}
+
+#ifdef notyet /* XXXXXXXXXXXXXXXX (it has other bugs beside the fact that I don't want to change syscalls.master) */
+/*
+ * sys_swapctl: main entry point for swapctl(2) system call
+ * [with two helper functions: swap_on and swap_off]
+ */
+int
+sys_swapctl(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_swapctl_args /* {
+ syscallarg(int) cmd;
+ syscallarg(void *) arg;
+ syscallarg(int) misc;
+ } */ *uap = (struct sys_swapctl_args *)v;
+ struct vnode *vp;
+ struct nameidata nd;
+ struct swappri *spp;
+ struct swapdev *sdp;
+ struct swapent *sep;
+ char userpath[PATH_MAX + 1];
+ size_t len;
+ int count, error, misc;
+ int priority;
+ UVMHIST_FUNC("sys_swapctl"); UVMHIST_CALLED(pdhist);
+
+ misc = SCARG(uap, misc);
+
+ /*
+ * ensure serialized syscall access by grabbing the swap_syscall_lock
+ */
+ lockmgr(&swap_syscall_lock, LK_EXCLUSIVE, (void *)0, p);
+
+ /*
+ * we handle the non-priv NSWAP and STATS request first.
+ *
+ * SWAP_NSWAP: return number of config'd swap devices
+ * [can also be obtained with uvmexp sysctl]
+ */
+ if (SCARG(uap, cmd) == SWAP_NSWAP) {
+ UVMHIST_LOG(pdhist, "<- done SWAP_NSWAP=%d", uvmexp.nswapdev,
+ 0, 0, 0);
+ *retval = uvmexp.nswapdev;
+ error = 0;
+ goto out;
+ }
+
+ /*
+ * SWAP_STATS: get stats on current # of configured swap devs
+ *
+ * note that the swap_priority list can't change as long
+ * as we are holding the swap_syscall_lock. we don't want
+ * to grab the swap_data_lock because we may fault&sleep during
+ * copyout() and we don't want to be holding that lock then!
+ */
+ if (SCARG(uap, cmd) == SWAP_STATS
+#if defined(COMPAT_13)
+ || SCARG(uap, cmd) == SWAP_OSTATS
+#endif
+ ) {
+ sep = (struct swapent *)SCARG(uap, arg);
+ count = 0;
+
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev && misc-- > 0;
+ sdp = sdp->swd_next.cqe_next) {
+ /*
+ * backwards compatibility for system call.
+ * note that we use 'struct oswapent' as an
+ * overlay into both 'struct swapdev' and
+ * the userland 'struct swapent', as we
+ * want to retain backwards compatibility
+ * with NetBSD 1.3.
+ */
+ sdp->swd_ose.ose_inuse =
+ btodb(sdp->swd_npginuse << PAGE_SHIFT);
+ error = copyout((caddr_t)&sdp->swd_ose,
+ (caddr_t)sep, sizeof(struct oswapent));
+
+ /* now copy out the path if necessary */
+#if defined(COMPAT_13)
+ if (error == 0 && SCARG(uap, cmd) == SWAP_STATS)
+#else
+ if (error == 0)
+#endif
+ error = copyout((caddr_t)sdp->swd_path,
+ (caddr_t)&sep->se_path,
+ sdp->swd_pathlen);
+
+ if (error)
+ goto out;
+ count++;
+#if defined(COMPAT_13)
+ if (SCARG(uap, cmd) == SWAP_OSTATS)
+ ((struct oswapent *)sep)++;
+ else
+#endif
+ sep++;
+ }
+ }
+
+ UVMHIST_LOG(pdhist, "<- done SWAP_STATS", 0, 0, 0, 0);
+
+ *retval = count;
+ error = 0;
+ goto out;
+ }
+
+ /*
+ * all other requests require superuser privs. verify.
+ */
+ if ((error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+
+ /*
+ * at this point we expect a path name in arg. we will
+ * use namei() to gain a vnode reference (vref), and lock
+ * the vnode (VOP_LOCK).
+ *
+ * XXX: a NULL arg means use the root vnode pointer (e.g. for
+ * miniroot)
+ */
+ if (SCARG(uap, arg) == NULL) {
+ vp = rootvp; /* miniroot */
+ if (vget(vp, LK_EXCLUSIVE)) {
+ error = EBUSY;
+ goto out;
+ }
+ if (SCARG(uap, cmd) == SWAP_ON &&
+ copystr("miniroot", userpath, sizeof userpath, &len))
+ panic("swapctl: miniroot copy failed");
+ } else {
+ int space;
+ char *where;
+
+ if (SCARG(uap, cmd) == SWAP_ON) {
+ if ((error = copyinstr(SCARG(uap, arg), userpath,
+ sizeof userpath, &len)))
+ goto out;
+ space = UIO_SYSSPACE;
+ where = userpath;
+ } else {
+ space = UIO_USERSPACE;
+ where = (char *)SCARG(uap, arg);
+ }
+ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, space, where, p);
+ if ((error = namei(&nd)))
+ goto out;
+ vp = nd.ni_vp;
+ }
+ /* note: "vp" is referenced and locked */
+
+ error = 0; /* assume no error */
+ switch(SCARG(uap, cmd)) {
+ case SWAP_CTL:
+ /*
+ * get new priority, remove old entry (if any) and then
+ * reinsert it in the correct place. finally, prune out
+ * any empty priority structures.
+ */
+ priority = SCARG(uap, misc);
+ spp = (struct swappri *)
+ malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+ simple_lock(&swap_data_lock);
+ if ((sdp = swaplist_find(vp, 1)) == NULL) {
+ error = ENOENT;
+ } else {
+ swaplist_insert(sdp, spp, priority);
+ swaplist_trim();
+ }
+ simple_unlock(&swap_data_lock);
+ if (error)
+ free(spp, M_VMSWAP);
+ break;
+
+ case SWAP_ON:
+ /*
+ * check for duplicates. if none found, then insert a
+ * dummy entry on the list to prevent someone else from
+ * trying to enable this device while we are working on
+ * it.
+ */
+ priority = SCARG(uap, misc);
+ simple_lock(&swap_data_lock);
+ if ((sdp = swaplist_find(vp, 0)) != NULL) {
+ error = EBUSY;
+ simple_unlock(&swap_data_lock);
+ break;
+ }
+ sdp = (struct swapdev *)
+ malloc(sizeof *sdp, M_VMSWAP, M_WAITOK);
+ spp = (struct swappri *)
+ malloc(sizeof *spp, M_VMSWAP, M_WAITOK);
+ bzero(sdp, sizeof(*sdp));
+ sdp->swd_flags = SWF_FAKE; /* placeholder only */
+ sdp->swd_vp = vp;
+ sdp->swd_dev = (vp->v_type == VBLK) ? vp->v_rdev : NODEV;
+#ifdef SWAP_TO_FILES
+ /*
+ * XXX Is NFS elaboration necessary?
+ */
+ if (vp->v_type == VREG)
+ sdp->swd_cred = crdup(p->p_ucred);
+#endif
+ swaplist_insert(sdp, spp, priority);
+ simple_unlock(&swap_data_lock);
+
+ sdp->swd_pathlen = len;
+ sdp->swd_path = malloc(sdp->swd_pathlen, M_VMSWAP, M_WAITOK);
+ if (copystr(userpath, sdp->swd_path, sdp->swd_pathlen, 0) != 0)
+ panic("swapctl: copystr");
+ /*
+ * we've now got a FAKE placeholder in the swap list.
+ * now attempt to enable swap on it. if we fail, undo
+ * what we've done and kill the fake entry we just inserted.
+ * if swap_on is a success, it will clear the SWF_FAKE flag
+ */
+ if ((error = swap_on(p, sdp)) != 0) {
+ simple_lock(&swap_data_lock);
+ (void) swaplist_find(vp, 1); /* kill fake entry */
+ swaplist_trim();
+ simple_unlock(&swap_data_lock);
+#ifdef SWAP_TO_FILES
+ if (vp->v_type == VREG)
+ crfree(sdp->swd_cred);
+#endif
+ free(sdp->swd_path, M_VMSWAP);
+ free((caddr_t)sdp, M_VMSWAP);
+ break;
+ }
+
+ /*
+ * got it! now add a second reference to vp so that
+ * we keep a reference to the vnode after we return.
+ */
+ vref(vp);
+ break;
+
+ case SWAP_OFF:
+ UVMHIST_LOG(pdhist, "someone is using SWAP_OFF...??", 0,0,0,0);
+#ifdef SWAP_OFF_WORKS
+ /*
+ * find the entry of interest and ensure it is enabled.
+ */
+ simple_lock(&swap_data_lock);
+ if ((sdp = swaplist_find(vp, 0)) == NULL) {
+ simple_unlock(&swap_data_lock);
+ error = ENXIO;
+ break;
+ }
+ /*
+ * If a device isn't in use or enabled, we
+ * can't stop swapping from it (again).
+ */
+ if ((sdp->swd_flags & (SWF_INUSE|SWF_ENABLE)) == 0) {
+ simple_unlock(&swap_data_lock);
+ error = EBUSY;
+ break;
+ }
+ /* XXXCDC: should we call with list locked or unlocked? */
+ if ((error = swap_off(p, sdp)) != 0)
+ break;
+ /* XXXCDC: might need relock here */
+
+ /*
+ * now we can kill the entry.
+ */
+ if ((sdp = swaplist_find(vp, 1)) == NULL) {
+ error = ENXIO;
+ break;
+ }
+ simple_unlock(&swap_data_lock);
+ free((caddr_t)sdp, M_VMSWAP);
+#else
+ error = EINVAL;
+#endif
+ break;
+
+ default:
+ UVMHIST_LOG(pdhist, "unhandled command: %#x",
+ SCARG(uap, cmd), 0, 0, 0);
+ error = EINVAL;
+ }
+
+ /*
+ * done! use vput to drop our reference and unlock
+ */
+ vput(vp);
+out:
+ lockmgr(&swap_syscall_lock, LK_RELEASE, (void *)0, p);
+
+ UVMHIST_LOG(pdhist, "<- done! error=%d", error, 0, 0, 0);
+ return (error);
+}
+#endif
+
+
+/*
+ * swap_on: attempt to enable a swapdev for swapping. note that the
+ * swapdev is already on the global list, but disabled (marked
+ * SWF_FAKE).
+ *
+ * => we avoid the start of the disk (to protect disk labels)
+ * => we also avoid the miniroot, if we are swapping to root.
+ * => caller should leave swap_data_lock unlocked, we may lock it
+ * if needed.
+ */
+#ifdef notyet /* used by swapctl */
+static int
+swap_on(p, sdp)
+ struct proc *p;
+ struct swapdev *sdp;
+{
+ static int count = 0; /* static */
+ struct vnode *vp;
+ int error, npages, nblocks, size;
+ long addr;
+#ifdef SWAP_TO_FILES
+ struct vattr va;
+#endif
+#ifdef NFS
+ extern int (**nfsv2_vnodeop_p) __P((void *));
+#endif /* NFS */
+ dev_t dev;
+ char *name;
+ UVMHIST_FUNC("swap_on"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * we want to enable swapping on sdp. the swd_vp contains
+ * the vnode we want (locked and ref'd), and the swd_dev
+ * contains the dev_t of the file, if it a block device.
+ */
+
+ vp = sdp->swd_vp;
+ dev = sdp->swd_dev;
+
+ /*
+ * open the swap file (mostly useful for block device files to
+ * let device driver know what is up).
+ *
+ * we skip the open/close for root on swap because the root
+ * has already been opened when root was mounted (mountroot).
+ */
+ if (vp != rootvp) {
+ if ((error = VOP_OPEN(vp, FREAD|FWRITE, p->p_ucred, p)))
+ return (error);
+ }
+
+ /* XXX this only works for block devices */
+ UVMHIST_LOG(pdhist, " dev=%d, major(dev)=%d", dev, major(dev), 0,0);
+
+ /*
+ * we now need to determine the size of the swap area. for
+ * block specials we can call the d_psize function.
+ * for normal files, we must stat [get attrs].
+ *
+ * we put the result in nblks.
+ * for normal files, we also want the filesystem block size
+ * (which we get with statfs).
+ */
+ switch (vp->v_type) {
+ case VBLK:
+ if (bdevsw[major(dev)].d_psize == 0 ||
+ (nblocks = (*bdevsw[major(dev)].d_psize)(dev)) == -1) {
+ error = ENXIO;
+ goto bad;
+ }
+ break;
+
+#ifdef SWAP_TO_FILES
+ case VREG:
+ if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)))
+ goto bad;
+ nblocks = (int)btodb(va.va_size);
+ if ((error =
+ VFS_STATFS(vp->v_mount, &vp->v_mount->mnt_stat, p)) != 0)
+ goto bad;
+
+ sdp->swd_bsize = vp->v_mount->mnt_stat.f_iosize;
+ /*
+ * limit the max # of outstanding I/O requests we issue
+ * at any one time. take it easy on NFS servers.
+ */
+#ifdef NFS
+ if (vp->v_op == nfsv2_vnodeop_p)
+ sdp->swd_maxactive = 2; /* XXX */
+ else
+#endif /* NFS */
+ sdp->swd_maxactive = 8; /* XXX */
+ break;
+#endif
+
+ default:
+ error = ENXIO;
+ goto bad;
+ }
+
+ /*
+ * save nblocks in a safe place and convert to pages.
+ */
+
+ sdp->swd_ose.ose_nblks = nblocks;
+ npages = dbtob((u_int64_t)nblocks) >> PAGE_SHIFT;
+
+ /*
+ * for block special files, we want to make sure that leave
+ * the disklabel and bootblocks alone, so we arrange to skip
+ * over them (randomly choosing to skip PAGE_SIZE bytes).
+ * note that because of this the "size" can be less than the
+ * actual number of blocks on the device.
+ */
+ if (vp->v_type == VBLK) {
+ /* we use pages 1 to (size - 1) [inclusive] */
+ size = npages - 1;
+ addr = 1;
+ } else {
+ /* we use pages 0 to (size - 1) [inclusive] */
+ size = npages;
+ addr = 0;
+ }
+
+ /*
+ * make sure we have enough blocks for a reasonable sized swap
+ * area. we want at least one page.
+ */
+
+ if (size < 1) {
+ UVMHIST_LOG(pdhist, " size <= 1!!", 0, 0, 0, 0);
+ error = EINVAL;
+ goto bad;
+ }
+
+ UVMHIST_LOG(pdhist, " dev=%x: size=%d addr=%ld\n", dev, size, addr, 0);
+
+ /*
+ * now we need to allocate an extent to manage this swap device
+ */
+ name = malloc(12, M_VMSWAP, M_WAITOK);
+ sprintf(name, "swap0x%04x", count++);
+
+ /* note that extent_create's 3rd arg is inclusive, thus "- 1" */
+ sdp->swd_ex = extent_create(name, 0, npages - 1, M_VMSWAP,
+ 0, 0, EX_WAITOK);
+ /* allocate the `saved' region from the extent so it won't be used */
+ if (addr) {
+ if (extent_alloc_region(sdp->swd_ex, 0, addr, EX_WAITOK))
+ panic("disklabel region");
+ sdp->swd_npginuse += addr;
+ uvmexp.swpginuse += addr;
+ }
+
+
+ /*
+ * if the vnode we are swapping to is the root vnode
+ * (i.e. we are swapping to the miniroot) then we want
+ * to make sure we don't overwrite it. do a statfs to
+ * find its size and skip over it.
+ */
+ if (vp == rootvp) {
+ struct mount *mp;
+ struct statfs *sp;
+ int rootblocks, rootpages;
+
+ mp = rootvnode->v_mount;
+ sp = &mp->mnt_stat;
+ rootblocks = sp->f_blocks * btodb(sp->f_bsize);
+ rootpages = round_page(dbtob(rootblocks)) >> PAGE_SHIFT;
+ if (rootpages > npages)
+ panic("swap_on: miniroot larger than swap?");
+
+ if (extent_alloc_region(sdp->swd_ex, addr,
+ rootpages, EX_WAITOK))
+ panic("swap_on: unable to preserve miniroot");
+
+ sdp->swd_npginuse += (rootpages - addr);
+ uvmexp.swpginuse += (rootpages - addr);
+
+ printf("Preserved %d pages of miniroot ", rootpages);
+ printf("leaving %d pages of swap\n", size - rootpages);
+ }
+
+ /*
+ * now add the new swapdev to the drum and enable.
+ */
+ simple_lock(&swap_data_lock);
+ swapdrum_add(sdp, npages);
+ sdp->swd_npages = npages;
+ sdp->swd_flags &= ~SWF_FAKE; /* going live */
+ sdp->swd_flags |= (SWF_INUSE|SWF_ENABLE);
+ simple_unlock(&swap_data_lock);
+ uvmexp.swpages += npages;
+
+ /*
+ * add anon's to reflect the swap space we added
+ */
+ uvm_anon_add(size);
+
+#if 0
+ /*
+ * At this point we could arrange to reserve memory for the
+ * swap buffer pools.
+ *
+ * I don't think this is necessary, since swapping starts well
+ * ahead of serious memory deprivation and the memory resource
+ * pools hold on to actively used memory. This should ensure
+ * we always have some resources to continue operation.
+ */
+
+ int s = splbio();
+ int n = 8 * sdp->swd_maxactive;
+
+ (void)pool_prime(swapbuf_pool, n, 0);
+
+ if (vp->v_type == VREG) {
+ /* Allocate additional vnx and vnd buffers */
+ /*
+ * Allocation Policy:
+ * (8 * swd_maxactive) vnx headers per swap dev
+ * (16 * swd_maxactive) vnd buffers per swap dev
+ */
+
+ n = 8 * sdp->swd_maxactive;
+ (void)pool_prime(vndxfer_pool, n, 0);
+
+ n = 16 * sdp->swd_maxactive;
+ (void)pool_prime(vndbuf_pool, n, 0);
+ }
+ splx(s);
+#endif
+
+ return (0);
+
+bad:
+ /*
+ * failure: close device if necessary and return error.
+ */
+ if (vp != rootvp)
+ (void)VOP_CLOSE(vp, FREAD|FWRITE, p->p_ucred, p);
+ return (error);
+}
+#endif
+
+#ifdef SWAP_OFF_WORKS
+/*
+ * swap_off: stop swapping on swapdev
+ *
+ * XXXCDC: what conditions go here?
+ */
+static int
+swap_off(p, sdp)
+ struct proc *p;
+ struct swapdev *sdp;
+{
+ char *name;
+ UVMHIST_FUNC("swap_off"); UVMHIST_CALLED(pdhist);
+
+ /* turn off the enable flag */
+ sdp->swd_flags &= ~SWF_ENABLE;
+
+ UVMHIST_LOG(pdhist, " dev=%x", sdp->swd_dev);
+
+ /*
+ * XXX write me
+ *
+ * the idea is to find out which processes are using this swap
+ * device, and page them all in.
+ *
+ * eventually, we should try to move them out to other swap areas
+ * if available.
+ *
+ * The alternative is to create a redirection map for this swap
+ * device. This should work by moving all the pages of data from
+ * the ex-swap device to another one, and making an entry in the
+ * redirection map for it. locking is going to be important for
+ * this!
+ *
+ * XXXCDC: also need to shrink anon pool
+ */
+
+ /* until the above code is written, we must ENODEV */
+ return ENODEV;
+
+ extent_free(swapmap, sdp->swd_mapoffset, sdp->swd_mapsize, EX_WAITOK);
+ name = sdp->swd_ex->ex_name;
+ extent_destroy(sdp->swd_ex);
+ free(name, M_VMSWAP);
+ free((caddr_t)sdp->swd_ex, M_VMSWAP);
+ if (sdp->swp_vp != rootvp)
+ (void) VOP_CLOSE(sdp->swd_vp, FREAD|FWRITE, p->p_ucred, p);
+ if (sdp->swd_vp)
+ vrele(sdp->swd_vp);
+ free((caddr_t)sdp, M_VMSWAP);
+ return (0);
+}
+#endif
+
+/*
+ * /dev/drum interface and i/o functions
+ */
+
+/*
+ * swread: the read function for the drum (just a call to physio)
+ */
+/*ARGSUSED*/
+int
+swread(dev, uio, ioflag)
+ dev_t dev;
+ struct uio *uio;
+ int ioflag;
+{
+ UVMHIST_FUNC("swread"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, " dev=%x offset=%qx", dev, uio->uio_offset, 0, 0);
+ return (physio(swstrategy, NULL, dev, B_READ, minphys, uio));
+}
+
+/*
+ * swwrite: the write function for the drum (just a call to physio)
+ */
+/*ARGSUSED*/
+int
+swwrite(dev, uio, ioflag)
+ dev_t dev;
+ struct uio *uio;
+ int ioflag;
+{
+ UVMHIST_FUNC("swwrite"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, " dev=%x offset=%qx", dev, uio->uio_offset, 0, 0);
+ return (physio(swstrategy, NULL, dev, B_WRITE, minphys, uio));
+}
+
+/*
+ * swstrategy: perform I/O on the drum
+ *
+ * => we must map the i/o request from the drum to the correct swapdev.
+ */
+void
+swstrategy(bp)
+ struct buf *bp;
+{
+ struct swapdev *sdp;
+ struct vnode *vp;
+ int pageno;
+ int bn;
+ UVMHIST_FUNC("swstrategy"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * convert block number to swapdev. note that swapdev can't
+ * be yanked out from under us because we are holding resources
+ * in it (i.e. the blocks we are doing I/O on).
+ */
+ pageno = dbtob(bp->b_blkno) >> PAGE_SHIFT;
+ simple_lock(&swap_data_lock);
+ sdp = swapdrum_getsdp(pageno);
+ simple_unlock(&swap_data_lock);
+ if (sdp == NULL) {
+ bp->b_error = EINVAL;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ UVMHIST_LOG(pdhist, " failed to get swap device", 0, 0, 0, 0);
+ return;
+ }
+
+ /*
+ * convert drum page number to block number on this swapdev.
+ */
+
+ pageno = pageno - sdp->swd_drumoffset; /* page # on swapdev */
+ bn = btodb(pageno << PAGE_SHIFT); /* convert to diskblock */
+
+ UVMHIST_LOG(pdhist, " %s: mapoff=%x bn=%x bcount=%ld\n",
+ ((bp->b_flags & B_READ) == 0) ? "write" : "read",
+ sdp->swd_drumoffset, bn, bp->b_bcount);
+
+
+ /*
+ * for block devices we finish up here.
+ * for regular files we have to do more work which we deligate
+ * to sw_reg_strategy().
+ */
+
+ switch (sdp->swd_vp->v_type) {
+ default:
+ panic("swstrategy: vnode type 0x%x", sdp->swd_vp->v_type);
+ case VBLK:
+
+ /*
+ * must convert "bp" from an I/O on /dev/drum to an I/O
+ * on the swapdev (sdp).
+ */
+ bp->b_blkno = bn; /* swapdev block number */
+ vp = sdp->swd_vp; /* swapdev vnode pointer */
+ bp->b_dev = sdp->swd_dev; /* swapdev dev_t */
+ VHOLD(vp); /* "hold" swapdev vp for i/o */
+
+ /*
+ * if we are doing a write, we have to redirect the i/o on
+ * drum's v_numoutput counter to the swapdevs.
+ */
+ if ((bp->b_flags & B_READ) == 0) {
+ int s = splbio();
+ vwakeup(bp); /* kills one 'v_numoutput' on drum */
+ vp->v_numoutput++; /* put it on swapdev */
+ splx(s);
+ }
+
+ /*
+ * dissassocate buffer with /dev/drum vnode
+ * [could be null if buf was from physio]
+ */
+ if (bp->b_vp != NULLVP)
+ brelvp(bp);
+
+ /*
+ * finally plug in swapdev vnode and start I/O
+ */
+ bp->b_vp = vp;
+ VOP_STRATEGY(bp);
+ return;
+#ifdef SWAP_TO_FILES
+ case VREG:
+ /*
+ * deligate to sw_reg_strategy function.
+ */
+ sw_reg_strategy(sdp, bp, bn);
+ return;
+#endif
+ }
+ /* NOTREACHED */
+}
+
+#ifdef SWAP_TO_FILES
+/*
+ * sw_reg_strategy: handle swap i/o to regular files
+ */
+static void
+sw_reg_strategy(sdp, bp, bn)
+ struct swapdev *sdp;
+ struct buf *bp;
+ int bn;
+{
+ struct vnode *vp;
+ struct vndxfer *vnx;
+ daddr_t nbn, byteoff;
+ caddr_t addr;
+ int s, off, nra, error, sz, resid;
+ UVMHIST_FUNC("sw_reg_strategy"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * allocate a vndxfer head for this transfer and point it to
+ * our buffer.
+ */
+ getvndxfer(vnx);
+ vnx->vx_flags = VX_BUSY;
+ vnx->vx_error = 0;
+ vnx->vx_pending = 0;
+ vnx->vx_bp = bp;
+ vnx->vx_sdp = sdp;
+
+ /*
+ * setup for main loop where we read filesystem blocks into
+ * our buffer.
+ */
+ error = 0;
+ bp->b_resid = bp->b_bcount; /* nothing transfered yet! */
+ addr = bp->b_data; /* current position in buffer */
+ byteoff = dbtob(bn);
+
+ for (resid = bp->b_resid; resid; resid -= sz) {
+ struct vndbuf *nbp;
+
+ /*
+ * translate byteoffset into block number. return values:
+ * vp = vnode of underlying device
+ * nbn = new block number (on underlying vnode dev)
+ * nra = num blocks we can read-ahead (excludes requested
+ * block)
+ */
+ nra = 0;
+ error = VOP_BMAP(sdp->swd_vp, byteoff / sdp->swd_bsize,
+ &vp, &nbn, &nra);
+
+ if (error == 0 && (long)nbn == -1) {
+ /*
+ * this used to just set error, but that doesn't
+ * do the right thing. Instead, it causes random
+ * memory errors. The panic() should remain until
+ * this condition doesn't destabilize the system.
+ */
+#if 1
+ panic("sw_reg_strategy: swap to sparse file");
+#else
+ error = EIO; /* failure */
+#endif
+ }
+
+ /*
+ * punt if there was an error or a hole in the file.
+ * we must wait for any i/o ops we have already started
+ * to finish before returning.
+ *
+ * XXX we could deal with holes here but it would be
+ * a hassle (in the write case).
+ */
+ if (error) {
+ s = splbio();
+ vnx->vx_error = error; /* pass error up */
+ goto out;
+ }
+
+ /*
+ * compute the size ("sz") of this transfer (in bytes).
+ * XXXCDC: ignores read-ahead for non-zero offset
+ */
+ if ((off = (byteoff % sdp->swd_bsize)) != 0)
+ sz = sdp->swd_bsize - off;
+ else
+ sz = (1 + nra) * sdp->swd_bsize;
+
+ if (resid < sz)
+ sz = resid;
+
+ UVMHIST_LOG(pdhist, "sw_reg_strategy: vp %p/%p offset 0x%x/0x%x",
+ sdp->swd_vp, vp, byteoff, nbn);
+
+ /*
+ * now get a buf structure. note that the vb_buf is
+ * at the front of the nbp structure so that you can
+ * cast pointers between the two structure easily.
+ */
+ getvndbuf(nbp);
+ nbp->vb_buf.b_flags = bp->b_flags | B_CALL;
+ nbp->vb_buf.b_bcount = sz;
+#if 0
+ nbp->vb_buf.b_bufsize = bp->b_bufsize; /* XXXCDC: really? */
+#endif
+ nbp->vb_buf.b_bufsize = sz;
+ nbp->vb_buf.b_error = 0;
+ nbp->vb_buf.b_data = addr;
+ nbp->vb_buf.b_blkno = nbn + btodb(off);
+ nbp->vb_buf.b_proc = bp->b_proc;
+ nbp->vb_buf.b_iodone = sw_reg_iodone;
+ nbp->vb_buf.b_vp = NULLVP;
+ nbp->vb_buf.b_vnbufs.le_next = NOLIST;
+ nbp->vb_buf.b_rcred = sdp->swd_cred;
+ nbp->vb_buf.b_wcred = sdp->swd_cred;
+
+ /*
+ * set b_dirtyoff/end and b_validoff/end. this is
+ * required by the NFS client code (otherwise it will
+ * just discard our I/O request).
+ */
+ if (bp->b_dirtyend == 0) {
+ nbp->vb_buf.b_dirtyoff = 0;
+ nbp->vb_buf.b_dirtyend = sz;
+ } else {
+ nbp->vb_buf.b_dirtyoff =
+ max(0, bp->b_dirtyoff - (bp->b_bcount-resid));
+ nbp->vb_buf.b_dirtyend =
+ min(sz,
+ max(0, bp->b_dirtyend - (bp->b_bcount-resid)));
+ }
+ if (bp->b_validend == 0) {
+ nbp->vb_buf.b_validoff = 0;
+ nbp->vb_buf.b_validend = sz;
+ } else {
+ nbp->vb_buf.b_validoff =
+ max(0, bp->b_validoff - (bp->b_bcount-resid));
+ nbp->vb_buf.b_validend =
+ min(sz,
+ max(0, bp->b_validend - (bp->b_bcount-resid)));
+ }
+
+ nbp->vb_xfer = vnx; /* patch it back in to vnx */
+
+ /*
+ * Just sort by block number
+ */
+ nbp->vb_buf.b_cylinder = nbp->vb_buf.b_blkno;
+ s = splbio();
+ if (vnx->vx_error != 0) {
+ putvndbuf(nbp);
+ goto out;
+ }
+ vnx->vx_pending++;
+
+ /* assoc new buffer with underlying vnode */
+ bgetvp(vp, &nbp->vb_buf);
+
+ /* sort it in and start I/O if we are not over our limit */
+ disksort(&sdp->swd_tab, &nbp->vb_buf);
+ sw_reg_start(sdp);
+ splx(s);
+
+ /*
+ * advance to the next I/O
+ */
+ byteoff += sz;
+ addr += sz;
+ }
+
+ s = splbio();
+
+out: /* Arrive here at splbio */
+ vnx->vx_flags &= ~VX_BUSY;
+ if (vnx->vx_pending == 0) {
+ if (vnx->vx_error != 0) {
+ bp->b_error = vnx->vx_error;
+ bp->b_flags |= B_ERROR;
+ }
+ putvndxfer(vnx);
+ biodone(bp);
+ }
+ splx(s);
+}
+
+/*
+ * sw_reg_start: start an I/O request on the requested swapdev
+ *
+ * => reqs are sorted by disksort (above)
+ */
+static void
+sw_reg_start(sdp)
+ struct swapdev *sdp;
+{
+ struct buf *bp;
+ UVMHIST_FUNC("sw_reg_start"); UVMHIST_CALLED(pdhist);
+
+ /* recursion control */
+ if ((sdp->swd_flags & SWF_BUSY) != 0)
+ return;
+
+ sdp->swd_flags |= SWF_BUSY;
+
+ while (sdp->swd_tab.b_active < sdp->swd_maxactive) {
+ bp = sdp->swd_tab.b_actf;
+ if (bp == NULL)
+ break;
+ sdp->swd_tab.b_actf = bp->b_actf;
+ sdp->swd_tab.b_active++;
+
+ UVMHIST_LOG(pdhist,
+ "sw_reg_start: bp %p vp %p blkno %p cnt %lx",
+ bp, bp->b_vp, bp->b_blkno, bp->b_bcount);
+ if ((bp->b_flags & B_READ) == 0)
+ bp->b_vp->v_numoutput++;
+ VOP_STRATEGY(bp);
+ }
+ sdp->swd_flags &= ~SWF_BUSY;
+}
+
+/*
+ * sw_reg_iodone: one of our i/o's has completed and needs post-i/o cleanup
+ *
+ * => note that we can recover the vndbuf struct by casting the buf ptr
+ */
+static void
+sw_reg_iodone(bp)
+ struct buf *bp;
+{
+ struct vndbuf *vbp = (struct vndbuf *) bp;
+ struct vndxfer *vnx = vbp->vb_xfer;
+ struct buf *pbp = vnx->vx_bp; /* parent buffer */
+ struct swapdev *sdp = vnx->vx_sdp;
+ int s, resid;
+ UVMHIST_FUNC("sw_reg_iodone"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, " vbp=%p vp=%p blkno=%x addr=%p",
+ vbp, vbp->vb_buf.b_vp, vbp->vb_buf.b_blkno, vbp->vb_buf.b_data);
+ UVMHIST_LOG(pdhist, " cnt=%lx resid=%lx",
+ vbp->vb_buf.b_bcount, vbp->vb_buf.b_resid, 0, 0);
+
+ /*
+ * protect vbp at splbio and update.
+ */
+
+ s = splbio();
+ resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid;
+ pbp->b_resid -= resid;
+ vnx->vx_pending--;
+
+ if (vbp->vb_buf.b_error) {
+ UVMHIST_LOG(pdhist, " got error=%d !",
+ vbp->vb_buf.b_error, 0, 0, 0);
+
+ /* pass error upward */
+ vnx->vx_error = vbp->vb_buf.b_error;
+ }
+
+ /*
+ * drop "hold" reference to vnode (if one)
+ * XXXCDC: always set to NULLVP, this is useless, right?
+ */
+ if (vbp->vb_buf.b_vp != NULLVP)
+ brelvp(&vbp->vb_buf);
+
+ /*
+ * kill vbp structure
+ */
+ putvndbuf(vbp);
+
+ /*
+ * wrap up this transaction if it has run to completion or, in
+ * case of an error, when all auxiliary buffers have returned.
+ */
+ if (vnx->vx_error != 0) {
+ /* pass error upward */
+ pbp->b_flags |= B_ERROR;
+ pbp->b_error = vnx->vx_error;
+ if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) {
+ putvndxfer(vnx);
+ biodone(pbp);
+ }
+ } else if (pbp->b_resid == 0) {
+#ifdef DIAGNOSTIC
+ if (vnx->vx_pending != 0)
+ panic("sw_reg_iodone: vnx pending: %d",vnx->vx_pending);
+#endif
+
+ if ((vnx->vx_flags & VX_BUSY) == 0) {
+ UVMHIST_LOG(pdhist, " iodone error=%d !",
+ pbp, vnx->vx_error, 0, 0);
+ putvndxfer(vnx);
+ biodone(pbp);
+ }
+ }
+
+ /*
+ * done! start next swapdev I/O if one is pending
+ */
+ sdp->swd_tab.b_active--;
+ sw_reg_start(sdp);
+
+ splx(s);
+}
+#endif /* SWAP_TO_FILES */
+
+
+/*
+ * uvm_swap_alloc: allocate space on swap
+ *
+ * => allocation is done "round robin" down the priority list, as we
+ * allocate in a priority we "rotate" the circle queue.
+ * => space can be freed with uvm_swap_free
+ * => we return the page slot number in /dev/drum (0 == invalid slot)
+ * => we lock swap_data_lock
+ * => XXXMRG: "LESSOK" INTERFACE NEEDED TO EXTENT SYSTEM
+ */
+int
+uvm_swap_alloc(nslots, lessok)
+ int *nslots; /* IN/OUT */
+ boolean_t lessok;
+{
+ struct swapdev *sdp;
+ struct swappri *spp;
+ u_long result;
+ UVMHIST_FUNC("uvm_swap_alloc"); UVMHIST_CALLED(pdhist);
+
+ /*
+ * no swap devices configured yet? definite failure.
+ */
+ if (uvmexp.nswapdev < 1)
+ return 0;
+
+ /*
+ * lock data lock, convert slots into blocks, and enter loop
+ */
+ simple_lock(&swap_data_lock);
+
+ReTry: /* XXXMRG */
+ for (spp = swap_priority.lh_first; spp != NULL;
+ spp = spp->spi_swappri.le_next) {
+ for (sdp = spp->spi_swapdev.cqh_first;
+ sdp != (void *)&spp->spi_swapdev;
+ sdp = sdp->swd_next.cqe_next) {
+ /* if it's not enabled, then we can't swap from it */
+ if ((sdp->swd_flags & SWF_ENABLE) == 0)
+ continue;
+ if (sdp->swd_npginuse + *nslots > sdp->swd_npages)
+ continue;
+ if (extent_alloc(sdp->swd_ex, *nslots, EX_NOALIGN,
+ EX_NOBOUNDARY, EX_MALLOCOK|EX_NOWAIT,
+ &result) != 0) {
+ continue;
+ }
+
+ /*
+ * successful allocation! now rotate the circleq.
+ */
+ CIRCLEQ_REMOVE(&spp->spi_swapdev, sdp, swd_next);
+ CIRCLEQ_INSERT_TAIL(&spp->spi_swapdev, sdp, swd_next);
+ sdp->swd_npginuse += *nslots;
+ uvmexp.swpginuse += *nslots;
+ simple_unlock(&swap_data_lock);
+ /* done! return drum slot number */
+ UVMHIST_LOG(pdhist,
+ "success! returning %d slots starting at %d",
+ *nslots, result + sdp->swd_drumoffset, 0, 0);
+#if 0
+{
+ struct swapdev *sdp2;
+
+ sdp2 = swapdrum_getsdp(result + sdp->swd_drumoffset);
+ if (sdp2 == NULL) {
+printf("uvm_swap_alloc: nslots=%d, dev=%x, drumoff=%d, result=%ld",
+ *nslots, sdp->swd_dev, sdp->swd_drumoffset, result);
+panic("uvm_swap_alloc: allocating unmapped swap block!");
+ }
+}
+#endif
+ return(result + sdp->swd_drumoffset);
+ }
+ }
+
+ /* XXXMRG: BEGIN HACK */
+ if (*nslots > 1 && lessok) {
+ *nslots = 1;
+ goto ReTry; /* XXXMRG: ugh! extent should support this for us */
+ }
+ /* XXXMRG: END HACK */
+
+ simple_unlock(&swap_data_lock);
+ return 0; /* failed */
+}
+
+/*
+ * uvm_swap_free: free swap slots
+ *
+ * => this can be all or part of an allocation made by uvm_swap_alloc
+ * => we lock swap_data_lock
+ */
+void
+uvm_swap_free(startslot, nslots)
+ int startslot;
+ int nslots;
+{
+ struct swapdev *sdp;
+ UVMHIST_FUNC("uvm_swap_free"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, "freeing %d slots starting at %d", nslots,
+ startslot, 0, 0);
+ /*
+ * convert drum slot offset back to sdp, free the blocks
+ * in the extent, and return. must hold pri lock to do
+ * lookup and access the extent.
+ */
+ simple_lock(&swap_data_lock);
+ sdp = swapdrum_getsdp(startslot);
+
+#ifdef DIAGNOSTIC
+ if (uvmexp.nswapdev < 1)
+ panic("uvm_swap_free: uvmexp.nswapdev < 1\n");
+ if (sdp == NULL) {
+ printf("uvm_swap_free: startslot %d, nslots %d\n", startslot,
+ nslots);
+ panic("uvm_swap_free: unmapped address\n");
+ }
+#endif
+ if (extent_free(sdp->swd_ex, startslot - sdp->swd_drumoffset, nslots,
+ EX_MALLOCOK|EX_NOWAIT) != 0)
+ printf("warning: resource shortage: %d slots of swap lost\n",
+ nslots);
+
+ sdp->swd_npginuse -= nslots;
+ uvmexp.swpginuse -= nslots;
+#ifdef DIAGNOSTIC
+ if (sdp->swd_npginuse < 0)
+ panic("uvm_swap_free: inuse < 0");
+#endif
+ simple_unlock(&swap_data_lock);
+}
+
+/*
+ * uvm_swap_put: put any number of pages into a contig place on swap
+ *
+ * => can be sync or async
+ * => XXXMRG: consider making it an inline or macro
+ */
+int
+uvm_swap_put(swslot, ppsp, npages, flags)
+ int swslot;
+ struct vm_page **ppsp;
+ int npages;
+ int flags;
+{
+ int result;
+
+#if 0
+ flags |= PGO_SYNCIO; /* XXXMRG: tmp, force sync */
+#endif
+
+ result = uvm_swap_io(ppsp, swslot, npages, B_WRITE |
+ ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+
+ return (result);
+}
+
+/*
+ * uvm_swap_get: get a single page from swap
+ *
+ * => usually a sync op (from fault)
+ * => XXXMRG: consider making it an inline or macro
+ */
+int
+uvm_swap_get(page, swslot, flags)
+ struct vm_page *page;
+ int swslot, flags;
+{
+ int result;
+
+ uvmexp.nswget++;
+#ifdef DIAGNOSTIC
+ if ((flags & PGO_SYNCIO) == 0)
+ printf("uvm_swap_get: ASYNC get requested?\n");
+#endif
+
+ result = uvm_swap_io(&page, swslot, 1, B_READ |
+ ((flags & PGO_SYNCIO) ? 0 : B_ASYNC));
+
+ return (result);
+}
+
+/*
+ * uvm_swap_io: do an i/o operation to swap
+ */
+
+static int
+uvm_swap_io(pps, startslot, npages, flags)
+ struct vm_page **pps;
+ int startslot, npages, flags;
+{
+ daddr_t startblk;
+ struct swapbuf *sbp;
+ struct buf *bp;
+ vaddr_t kva;
+ int result, s, waitf, pflag;
+ UVMHIST_FUNC("uvm_swap_io"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, "<- called, startslot=%d, npages=%d, flags=%d",
+ startslot, npages, flags, 0);
+ /*
+ * convert starting drum slot to block number
+ */
+ startblk = btodb(startslot << PAGE_SHIFT);
+
+ /*
+ * first, map the pages into the kernel (XXX: currently required
+ * by buffer system). note that we don't let pagermapin alloc
+ * an aiodesc structure because we don't want to chance a malloc.
+ * we've got our own pool of aiodesc structures (in swapbuf).
+ */
+ waitf = (flags & B_ASYNC) ? M_NOWAIT : M_WAITOK;
+ kva = uvm_pagermapin(pps, npages, NULL, waitf);
+ if (kva == NULL)
+ return (VM_PAGER_AGAIN);
+
+ /*
+ * now allocate a swap buffer off of freesbufs
+ * [make sure we don't put the pagedaemon to sleep...]
+ */
+ s = splbio();
+ pflag = ((flags & B_ASYNC) != 0 || curproc == uvm.pagedaemon_proc)
+ ? 0
+ : PR_WAITOK;
+ sbp = pool_get(swapbuf_pool, pflag);
+ splx(s); /* drop splbio */
+
+ /*
+ * if we failed to get a swapbuf, return "try again"
+ */
+ if (sbp == NULL)
+ return (VM_PAGER_AGAIN);
+
+ /*
+ * fill in the bp/sbp. we currently route our i/o through
+ * /dev/drum's vnode [swapdev_vp].
+ */
+ bp = &sbp->sw_buf;
+ bp->b_flags = B_BUSY | B_NOCACHE | (flags & (B_READ|B_ASYNC));
+ bp->b_proc = &proc0; /* XXX */
+ bp->b_rcred = bp->b_wcred = proc0.p_ucred;
+ bp->b_vnbufs.le_next = NOLIST;
+ bp->b_data = (caddr_t)kva;
+ bp->b_blkno = startblk;
+ VHOLD(swapdev_vp);
+ bp->b_vp = swapdev_vp;
+ /* XXXCDC: isn't swapdev_vp always a VCHR? */
+ /* XXXMRG: probably -- this is obviously something inherited... */
+ if (swapdev_vp->v_type == VBLK)
+ bp->b_dev = swapdev_vp->v_rdev;
+ bp->b_bcount = npages << PAGE_SHIFT;
+
+ /*
+ * for pageouts we must set "dirtyoff" [NFS client code needs it].
+ * and we bump v_numoutput (counter of number of active outputs).
+ */
+ if ((bp->b_flags & B_READ) == 0) {
+ bp->b_dirtyoff = 0;
+ bp->b_dirtyend = npages << PAGE_SHIFT;
+ s = splbio();
+ swapdev_vp->v_numoutput++;
+ splx(s);
+ }
+
+ /*
+ * for async ops we must set up the aiodesc and setup the callback
+ * XXX: we expect no async-reads, but we don't prevent it here.
+ */
+ if (flags & B_ASYNC) {
+ sbp->sw_aio.aiodone = uvm_swap_aiodone;
+ sbp->sw_aio.kva = kva;
+ sbp->sw_aio.npages = npages;
+ sbp->sw_aio.pd_ptr = sbp; /* backpointer */
+ bp->b_flags |= B_CALL; /* set callback */
+ bp->b_iodone = uvm_swap_bufdone;/* "buf" iodone function */
+ UVMHIST_LOG(pdhist, "doing async!", 0, 0, 0, 0);
+ }
+ UVMHIST_LOG(pdhist,
+ "about to start io: data = 0x%p blkno = 0x%x, bcount = %ld",
+ bp->b_data, bp->b_blkno, bp->b_bcount, 0);
+
+ /*
+ * now we start the I/O, and if async, return.
+ */
+ VOP_STRATEGY(bp);
+ if (flags & B_ASYNC)
+ return (VM_PAGER_PEND);
+
+ /*
+ * must be sync i/o. wait for it to finish
+ */
+ bp->b_error = biowait(bp);
+ result = (bp->b_flags & B_ERROR) ? VM_PAGER_ERROR : VM_PAGER_OK;
+
+ /*
+ * kill the pager mapping
+ */
+ uvm_pagermapout(kva, npages);
+
+ /*
+ * now dispose of the swap buffer
+ */
+ s = splbio();
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY|B_NOCACHE);
+ if (bp->b_vp)
+ brelvp(bp);
+
+ pool_put(swapbuf_pool, sbp);
+ splx(s);
+
+ /*
+ * finally return.
+ */
+ UVMHIST_LOG(pdhist, "<- done (sync) result=%d", result, 0, 0, 0);
+ return (result);
+}
+
+/*
+ * uvm_swap_bufdone: called from the buffer system when the i/o is done
+ */
+static void
+uvm_swap_bufdone(bp)
+ struct buf *bp;
+{
+ struct swapbuf *sbp = (struct swapbuf *) bp;
+ int s = splbio();
+ UVMHIST_FUNC("uvm_swap_bufdone"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, "cleaning buf %p", buf, 0, 0, 0);
+#ifdef DIAGNOSTIC
+ /*
+ * sanity check: swapbufs are private, so they shouldn't be wanted
+ */
+ if (bp->b_flags & B_WANTED)
+ panic("uvm_swap_bufdone: private buf wanted");
+#endif
+
+ /*
+ * drop buffers reference to the vnode and its flags.
+ */
+ bp->b_flags &= ~(B_BUSY|B_WANTED|B_PHYS|B_PAGET|B_UAREA|B_DIRTY|B_NOCACHE);
+ if (bp->b_vp)
+ brelvp(bp);
+
+ /*
+ * now put the aio on the uvm.aio_done list and wake the
+ * pagedaemon (which will finish up our job in its context).
+ */
+ simple_lock(&uvm.pagedaemon_lock); /* locks uvm.aio_done */
+ TAILQ_INSERT_TAIL(&uvm.aio_done, &sbp->sw_aio, aioq);
+ simple_unlock(&uvm.pagedaemon_lock);
+
+ thread_wakeup(&uvm.pagedaemon);
+ splx(s);
+}
+
+/*
+ * uvm_swap_aiodone: aiodone function for anonymous memory
+ *
+ * => this is called in the context of the pagedaemon (but with the
+ * page queues unlocked!)
+ * => our "aio" structure must be part of a "swapbuf"
+ */
+static void
+uvm_swap_aiodone(aio)
+ struct uvm_aiodesc *aio;
+{
+ struct swapbuf *sbp = aio->pd_ptr;
+ struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT];
+ int lcv, s;
+ vaddr_t addr;
+ UVMHIST_FUNC("uvm_swap_aiodone"); UVMHIST_CALLED(pdhist);
+
+ UVMHIST_LOG(pdhist, "done with aio %p", aio, 0, 0, 0);
+#ifdef DIAGNOSTIC
+ /*
+ * sanity check
+ */
+ if (aio->npages > (MAXBSIZE >> PAGE_SHIFT))
+ panic("uvm_swap_aiodone: aio too big!");
+#endif
+
+ /*
+ * first, we have to recover the page pointers (pps) by poking in the
+ * kernel pmap (XXX: should be saved in the buf structure).
+ */
+ for (addr = aio->kva, lcv = 0 ; lcv < aio->npages ;
+ addr += PAGE_SIZE, lcv++) {
+ pps[lcv] = uvm_pageratop(addr);
+ }
+
+ /*
+ * now we can dispose of the kernel mappings of the buffer
+ */
+ uvm_pagermapout(aio->kva, aio->npages);
+
+ /*
+ * now we can dispose of the pages by using the dropcluster function
+ * [note that we have no "page of interest" so we pass in null]
+ */
+ uvm_pager_dropcluster(NULL, NULL, pps, &aio->npages,
+ PGO_PDFREECLUST, 0);
+
+ /*
+ * finally, we can dispose of the swapbuf
+ */
+ s = splbio();
+ pool_put(swapbuf_pool, sbp);
+ splx(s);
+
+ /*
+ * done!
+ */
+}
diff --git a/sys/uvm/uvm_swap.h b/sys/uvm/uvm_swap.h
new file mode 100644
index 00000000000..008db98b241
--- /dev/null
+++ b/sys/uvm/uvm_swap.h
@@ -0,0 +1,42 @@
+/* $NetBSD: uvm_swap.h,v 1.3 1998/02/07 11:09:48 mrg Exp $ */
+
+/*
+ * Copyright (c) 1997 Matthew R. Green
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Id: uvm_swap.h,v 1.1.2.6 1997/12/15 05:39:31 mrg Exp
+ */
+
+#ifndef _UVM_UVM_SWAP_H_
+#define _UVM_UVM_SWAP_H_
+
+int uvm_swap_get __P((struct vm_page *, int, int));
+int uvm_swap_put __P((int, struct vm_page **, int,
+ int));
+int uvm_swap_alloc __P((int *wanted, boolean_t lessok));
+void uvm_swap_free __P((int startslot, int nslots));
+
+#endif /* _UVM_UVM_SWAP_H_ */
diff --git a/sys/uvm/uvm_unix.c b/sys/uvm/uvm_unix.c
new file mode 100644
index 00000000000..ed1588491cc
--- /dev/null
+++ b/sys/uvm/uvm_unix.c
@@ -0,0 +1,258 @@
+/* $NetBSD: uvm_unix.c,v 1.7 1998/10/11 23:18:21 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993 The Regents of the University of California.
+ * Copyright (c) 1988 University of Utah.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: Utah $Hdr: vm_unix.c 1.1 89/11/07$
+ * @(#)vm_unix.c 8.1 (Berkeley) 6/11/93
+ * from: Id: uvm_unix.c,v 1.1.2.2 1997/08/25 18:52:30 chuck Exp
+ */
+
+/*
+ * uvm_unix.c: traditional sbrk/grow interface to vm.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/vnode.h>
+#include <sys/core.h>
+
+#include <sys/mount.h>
+#include <sys/syscallargs.h>
+
+#include <vm/vm.h>
+#include <uvm/uvm.h>
+
+
+/*
+ * sys_obreak: set break
+ */
+
+int
+sys_obreak(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+ struct sys_obreak_args /* {
+ syscallarg(char *) nsize;
+ } */ *uap = v;
+ register struct vmspace *vm = p->p_vmspace;
+ vaddr_t new, old;
+ int rv;
+ register int diff;
+
+ old = (vaddr_t)vm->vm_daddr;
+ new = round_page(SCARG(uap, nsize));
+ if ((int)(new - old) > p->p_rlimit[RLIMIT_DATA].rlim_cur)
+ return(ENOMEM);
+
+ old = round_page(old + ctob(vm->vm_dsize));
+ diff = new - old;
+
+ /*
+ * grow or shrink?
+ */
+
+ if (diff > 0) {
+
+ rv = uvm_map(&vm->vm_map, &old, diff, NULL, UVM_UNKNOWN_OFFSET,
+ UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, UVM_INH_COPY,
+ UVM_ADV_NORMAL, UVM_FLAG_AMAPPAD|UVM_FLAG_FIXED|
+ UVM_FLAG_OVERLAY|UVM_FLAG_COPYONW));
+
+ if (rv != KERN_SUCCESS) {
+ uprintf("sbrk: grow failed, return = %d\n", rv);
+ return(ENOMEM);
+ }
+ vm->vm_dsize += btoc(diff);
+
+ } else if (diff < 0) {
+
+ diff = -diff;
+ rv = uvm_deallocate(&vm->vm_map, new, diff);
+ if (rv != KERN_SUCCESS) {
+ uprintf("sbrk: shrink failed, return = %d\n", rv);
+ return(ENOMEM);
+ }
+ vm->vm_dsize -= btoc(diff);
+
+ }
+ return(0);
+}
+
+/*
+ * uvm_grow: enlarge the "stack segment" to include sp.
+ */
+
+int
+uvm_grow(p, sp)
+ struct proc *p;
+ vaddr_t sp;
+{
+ register struct vmspace *vm = p->p_vmspace;
+ register int si;
+
+ /*
+ * For user defined stacks (from sendsig).
+ */
+ if (sp < (vaddr_t)vm->vm_maxsaddr)
+ return (0);
+
+ /*
+ * For common case of already allocated (from trap).
+ */
+ if (sp >= USRSTACK - ctob(vm->vm_ssize))
+ return (1);
+
+ /*
+ * Really need to check vs limit and increment stack size if ok.
+ */
+ si = clrnd(btoc(USRSTACK-sp) - vm->vm_ssize);
+ if (vm->vm_ssize + si > btoc(p->p_rlimit[RLIMIT_STACK].rlim_cur))
+ return (0);
+ vm->vm_ssize += si;
+ return (1);
+}
+
+/*
+ * sys_oadvise: old advice system call
+ */
+
+/* ARGSUSED */
+int
+sys_ovadvise(p, v, retval)
+ struct proc *p;
+ void *v;
+ register_t *retval;
+{
+#if 0
+ struct sys_ovadvise_args /* {
+ syscallarg(int) anom;
+ } */ *uap = v;
+#endif
+
+ return (EINVAL);
+}
+
+/*
+ * uvm_coredump: dump core!
+ */
+
+int
+uvm_coredump(p, vp, cred, chdr)
+ struct proc *p;
+ struct vnode *vp;
+ struct ucred *cred;
+ struct core *chdr;
+{
+ register struct vmspace *vm = p->p_vmspace;
+ register vm_map_t map = &vm->vm_map;
+ register vm_map_entry_t entry;
+ vaddr_t start, end;
+ struct coreseg cseg;
+ off_t offset;
+ int flag, error = 0;
+
+ offset = chdr->c_hdrsize + chdr->c_seghdrsize + chdr->c_cpusize;
+
+ for (entry = map->header.next; entry != &map->header;
+ entry = entry->next) {
+
+ /* should never happen for a user process */
+ if (UVM_ET_ISSUBMAP(entry)) {
+ panic("uvm_coredump: user process with submap?");
+ }
+
+ if (!(entry->protection & VM_PROT_WRITE))
+ continue;
+
+ start = entry->start;
+ end = entry->end;
+
+ if (start >= VM_MAXUSER_ADDRESS)
+ continue;
+
+ if (end > VM_MAXUSER_ADDRESS)
+ end = VM_MAXUSER_ADDRESS;
+
+ if (start >= (vaddr_t)vm->vm_maxsaddr) {
+ flag = CORE_STACK;
+ start = trunc_page(USRSTACK - ctob(vm->vm_ssize));
+ if (start >= end)
+ continue;
+ } else
+ flag = CORE_DATA;
+
+ /*
+ * Set up a new core file segment.
+ */
+ CORE_SETMAGIC(cseg, CORESEGMAGIC, CORE_GETMID(*chdr), flag);
+ cseg.c_addr = start;
+ cseg.c_size = end - start;
+
+ error = vn_rdwr(UIO_WRITE, vp,
+ (caddr_t)&cseg, chdr->c_seghdrsize,
+ offset, UIO_SYSSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, NULL, p);
+ if (error)
+ break;
+
+ offset += chdr->c_seghdrsize;
+ error = vn_rdwr(UIO_WRITE, vp,
+ (caddr_t)cseg.c_addr, (int)cseg.c_size,
+ offset, UIO_USERSPACE,
+ IO_NODELOCKED|IO_UNIT, cred, NULL, p);
+ if (error)
+ break;
+
+ offset += cseg.c_size;
+ chdr->c_nseg++;
+ }
+
+ return (error);
+}
+
diff --git a/sys/uvm/uvm_user.c b/sys/uvm/uvm_user.c
new file mode 100644
index 00000000000..e3c328298b7
--- /dev/null
+++ b/sys/uvm/uvm_user.c
@@ -0,0 +1,72 @@
+/* $NetBSD: uvm_user.c,v 1.6 1998/10/11 23:18:21 chuck Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_user.c,v 1.1.2.1 1997/08/14 19:10:41 chuck Exp
+ */
+
+/*
+ * uvm_user.c: high level uvm_allocate/uvm_deallocate interface into vm.
+ */
+
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+
+#include <vm/vm.h>
+#include <uvm/uvm.h>
+
+/*
+ * uvm_deallocate: deallocate memory (unmap)
+ */
+
+int
+uvm_deallocate(map, start, size)
+ vm_map_t map;
+ vaddr_t start;
+ vsize_t size;
+{
+
+ if (map == NULL)
+ panic("uvm_deallocate with null map");
+
+ if (size == (vaddr_t) 0)
+ return (KERN_SUCCESS);
+
+ return(uvm_unmap(map, trunc_page(start), round_page(start+size)));
+
+}
diff --git a/sys/uvm/uvm_vnode.c b/sys/uvm/uvm_vnode.c
new file mode 100644
index 00000000000..154c009b2d0
--- /dev/null
+++ b/sys/uvm/uvm_vnode.c
@@ -0,0 +1,2067 @@
+/* $NetBSD: uvm_vnode.c,v 1.18 1999/01/29 12:56:17 bouyer Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California.
+ * Copyright (c) 1990 University of Utah.
+ *
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor,
+ * Washington University, the University of California, Berkeley and
+ * its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vnode_pager.c 8.8 (Berkeley) 2/13/94
+ * from: Id: uvm_vnode.c,v 1.1.2.26 1998/02/02 20:38:07 chuck Exp
+ */
+
+/*
+ * uvm_vnode.c: the vnode pager.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/fcntl.h>
+#include <sys/conf.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_vnode.h>
+
+/*
+ * private global data structure
+ *
+ * we keep a list of writeable active vnode-backed VM objects for sync op.
+ * we keep a simpleq of vnodes that are currently being sync'd.
+ */
+
+LIST_HEAD(uvn_list_struct, uvm_vnode);
+static struct uvn_list_struct uvn_wlist; /* writeable uvns */
+static simple_lock_data_t uvn_wl_lock; /* locks uvn_wlist */
+
+SIMPLEQ_HEAD(uvn_sq_struct, uvm_vnode);
+static struct uvn_sq_struct uvn_sync_q; /* sync'ing uvns */
+lock_data_t uvn_sync_lock; /* locks sync operation */
+
+/*
+ * functions
+ */
+
+static int uvn_asyncget __P((struct uvm_object *, vaddr_t,
+ int));
+struct uvm_object *uvn_attach __P((void *, vm_prot_t));
+static void uvn_cluster __P((struct uvm_object *, vaddr_t,
+ vaddr_t *, vaddr_t *));
+static void uvn_detach __P((struct uvm_object *));
+static boolean_t uvn_flush __P((struct uvm_object *, vaddr_t,
+ vaddr_t, int));
+static int uvn_get __P((struct uvm_object *, vaddr_t,
+ vm_page_t *, int *, int,
+ vm_prot_t, int, int));
+static void uvn_init __P((void));
+static int uvn_io __P((struct uvm_vnode *, vm_page_t *,
+ int, int, int));
+static int uvn_put __P((struct uvm_object *, vm_page_t *,
+ int, boolean_t));
+static void uvn_reference __P((struct uvm_object *));
+static boolean_t uvn_releasepg __P((struct vm_page *,
+ struct vm_page **));
+
+/*
+ * master pager structure
+ */
+
+struct uvm_pagerops uvm_vnodeops = {
+ uvn_init,
+ uvn_attach,
+ uvn_reference,
+ uvn_detach,
+ NULL, /* no specialized fault routine required */
+ uvn_flush,
+ uvn_get,
+ uvn_asyncget,
+ uvn_put,
+ uvn_cluster,
+ uvm_mk_pcluster, /* use generic version of this: see uvm_pager.c */
+ uvm_shareprot, /* !NULL: allow us in share maps */
+ NULL, /* AIO-DONE function (not until we have asyncio) */
+ uvn_releasepg,
+};
+
+/*
+ * the ops!
+ */
+
+/*
+ * uvn_init
+ *
+ * init pager private data structures.
+ */
+
+static void
+uvn_init()
+{
+
+ LIST_INIT(&uvn_wlist);
+ simple_lock_init(&uvn_wl_lock);
+ /* note: uvn_sync_q init'd in uvm_vnp_sync() */
+ lockinit(&uvn_sync_lock, PVM, "uvnsync", 0, 0);
+}
+
+/*
+ * uvn_attach
+ *
+ * attach a vnode structure to a VM object. if the vnode is already
+ * attached, then just bump the reference count by one and return the
+ * VM object. if not already attached, attach and return the new VM obj.
+ * the "accessprot" tells the max access the attaching thread wants to
+ * our pages.
+ *
+ * => caller must _not_ already be holding the lock on the uvm_object.
+ * => in fact, nothing should be locked so that we can sleep here.
+ * => note that uvm_object is first thing in vnode structure, so their
+ * pointers are equiv.
+ */
+
+struct uvm_object *
+uvn_attach(arg, accessprot)
+ void *arg;
+ vm_prot_t accessprot;
+{
+ struct vnode *vp = arg;
+ struct uvm_vnode *uvn = &vp->v_uvm;
+ struct vattr vattr;
+ int oldflags, result;
+ struct partinfo pi;
+ u_quad_t used_vnode_size;
+ UVMHIST_FUNC("uvn_attach"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "(vn=0x%x)", arg,0,0,0);
+
+ used_vnode_size = (u_quad_t)0; /* XXX gcc -Wuninitialized */
+
+ /*
+ * first get a lock on the uvn.
+ */
+ simple_lock(&uvn->u_obj.vmobjlock);
+ while (uvn->u_flags & UVM_VNODE_BLOCKED) {
+ uvn->u_flags |= UVM_VNODE_WANTED;
+ UVMHIST_LOG(maphist, " SLEEPING on blocked vn",0,0,0,0);
+ UVM_UNLOCK_AND_WAIT(uvn, &uvn->u_obj.vmobjlock, FALSE,
+ "uvn_attach", 0);
+ simple_lock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist," WOKE UP",0,0,0,0);
+ }
+
+ /*
+ * if we're mapping a BLK device, make sure it is a disk.
+ */
+ if (vp->v_type == VBLK && bdevsw[major(vp->v_rdev)].d_type != D_DISK) {
+ simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+ UVMHIST_LOG(maphist,"<- done (VBLK not D_DISK!)", 0,0,0,0);
+ return(NULL);
+ }
+
+ /*
+ * now we have lock and uvn must not be in a blocked state.
+ * first check to see if it is already active, in which case
+ * we can bump the reference count, check to see if we need to
+ * add it to the writeable list, and then return.
+ */
+ if (uvn->u_flags & UVM_VNODE_VALID) { /* already active? */
+
+ /* regain VREF if we were persisting */
+ if (uvn->u_obj.uo_refs == 0) {
+ VREF(vp);
+ UVMHIST_LOG(maphist," VREF (reclaim persisting vnode)",
+ 0,0,0,0);
+ }
+ uvn->u_obj.uo_refs++; /* bump uvn ref! */
+
+ /* check for new writeable uvn */
+ if ((accessprot & VM_PROT_WRITE) != 0 &&
+ (uvn->u_flags & UVM_VNODE_WRITEABLE) == 0) {
+ simple_lock(&uvn_wl_lock);
+ LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+ simple_unlock(&uvn_wl_lock);
+ /* we are now on wlist! */
+ uvn->u_flags |= UVM_VNODE_WRITEABLE;
+ }
+
+ /* unlock and return */
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist,"<- done, refcnt=%d", uvn->u_obj.uo_refs,
+ 0, 0, 0);
+ return (&uvn->u_obj);
+ }
+
+ /*
+ * need to call VOP_GETATTR() to get the attributes, but that could
+ * block (due to I/O), so we want to unlock the object before calling.
+ * however, we want to keep anyone else from playing with the object
+ * while it is unlocked. to do this we set UVM_VNODE_ALOCK which
+ * prevents anyone from attaching to the vnode until we are done with
+ * it.
+ */
+ uvn->u_flags = UVM_VNODE_ALOCK;
+ simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock in case we sleep */
+ /* XXX: curproc? */
+
+ if (vp->v_type == VBLK) {
+ /*
+ * We could implement this as a specfs getattr call, but:
+ *
+ * (1) VOP_GETATTR() would get the file system
+ * vnode operation, not the specfs operation.
+ *
+ * (2) All we want is the size, anyhow.
+ */
+ result = (*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev,
+ DIOCGPART, (caddr_t)&pi, FREAD, curproc);
+ if (result == 0) {
+ /* XXX should remember blocksize */
+ used_vnode_size = (u_quad_t)pi.disklab->d_secsize *
+ (u_quad_t)pi.part->p_size;
+ }
+ } else {
+ result = VOP_GETATTR(vp, &vattr, curproc->p_ucred, curproc);
+ if (result == 0)
+ used_vnode_size = vattr.va_size;
+ }
+
+ /* relock object */
+ simple_lock(&uvn->u_obj.vmobjlock);
+
+ if (result != 0) {
+ if (uvn->u_flags & UVM_VNODE_WANTED)
+ wakeup(uvn);
+ uvn->u_flags = 0;
+ simple_unlock(&uvn->u_obj.vmobjlock); /* drop lock */
+ UVMHIST_LOG(maphist,"<- done (VOP_GETATTR FAILED!)", 0,0,0,0);
+ return(NULL);
+ }
+
+ /*
+ * make sure that the newsize fits within a vaddr_t
+ * XXX: need to revise addressing data types
+ */
+#ifdef DEBUG
+ if (vp->v_type == VBLK)
+ printf("used_vnode_size = %qu\n", used_vnode_size);
+#endif
+ if (used_vnode_size > (vaddr_t) -PAGE_SIZE) {
+#ifdef DEBUG
+ printf("uvn_attach: vn %p size truncated %qx->%x\n", vp,
+ used_vnode_size, -PAGE_SIZE);
+#endif
+ used_vnode_size = (vaddr_t) -PAGE_SIZE;
+ }
+
+ /*
+ * now set up the uvn.
+ */
+ uvn->u_obj.pgops = &uvm_vnodeops;
+ TAILQ_INIT(&uvn->u_obj.memq);
+ uvn->u_obj.uo_npages = 0;
+ uvn->u_obj.uo_refs = 1; /* just us... */
+ oldflags = uvn->u_flags;
+ uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
+ uvn->u_nio = 0;
+ uvn->u_size = used_vnode_size;
+
+ /* if write access, we need to add it to the wlist */
+ if (accessprot & VM_PROT_WRITE) {
+ simple_lock(&uvn_wl_lock);
+ LIST_INSERT_HEAD(&uvn_wlist, uvn, u_wlist);
+ simple_unlock(&uvn_wl_lock);
+ uvn->u_flags |= UVM_VNODE_WRITEABLE; /* we are on wlist! */
+ }
+
+ /*
+ * add a reference to the vnode. this reference will stay as long
+ * as there is a valid mapping of the vnode. dropped when the
+ * reference count goes to zero [and we either free or persist].
+ */
+ VREF(vp);
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ if (oldflags & UVM_VNODE_WANTED)
+ wakeup(uvn);
+
+ UVMHIST_LOG(maphist,"<- done/VREF, ret 0x%x", &uvn->u_obj,0,0,0);
+ return(&uvn->u_obj);
+}
+
+
+/*
+ * uvn_reference
+ *
+ * duplicate a reference to a VM object. Note that the reference
+ * count must already be at least one (the passed in reference) so
+ * there is no chance of the uvn being killed or locked out here.
+ *
+ * => caller must call with object unlocked.
+ * => caller must be using the same accessprot as was used at attach time
+ */
+
+
+static void
+uvn_reference(uobj)
+ struct uvm_object *uobj;
+{
+#ifdef DIAGNOSTIC
+ struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+#endif
+ UVMHIST_FUNC("uvn_reference"); UVMHIST_CALLED(maphist);
+
+ simple_lock(&uobj->vmobjlock);
+#ifdef DIAGNOSTIC
+ if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+ printf("uvn_reference: ref=%d, flags=0x%x\n", uvn->u_flags,
+ uobj->uo_refs);
+ panic("uvn_reference: invalid state");
+ }
+#endif
+ uobj->uo_refs++;
+ UVMHIST_LOG(maphist, "<- done (uobj=0x%x, ref = %d)",
+ uobj, uobj->uo_refs,0,0);
+ simple_unlock(&uobj->vmobjlock);
+}
+
+/*
+ * uvn_detach
+ *
+ * remove a reference to a VM object.
+ *
+ * => caller must call with object unlocked and map locked.
+ * => this starts the detach process, but doesn't have to finish it
+ * (async i/o could still be pending).
+ */
+static void
+uvn_detach(uobj)
+ struct uvm_object *uobj;
+{
+ struct uvm_vnode *uvn;
+ struct vnode *vp;
+ int oldflags;
+ UVMHIST_FUNC("uvn_detach"); UVMHIST_CALLED(maphist);
+
+ simple_lock(&uobj->vmobjlock);
+
+ UVMHIST_LOG(maphist," (uobj=0x%x) ref=%d", uobj,uobj->uo_refs,0,0);
+ uobj->uo_refs--; /* drop ref! */
+ if (uobj->uo_refs) { /* still more refs */
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(maphist, "<- done (rc>0)", 0,0,0,0);
+ return;
+ }
+
+ /*
+ * get other pointers ...
+ */
+
+ uvn = (struct uvm_vnode *) uobj;
+ vp = (struct vnode *) uobj;
+
+ /*
+ * clear VTEXT flag now that there are no mappings left (VTEXT is used
+ * to keep an active text file from being overwritten).
+ */
+ vp->v_flag &= ~VTEXT;
+
+ /*
+ * we just dropped the last reference to the uvn. see if we can
+ * let it "stick around".
+ */
+
+ if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
+ /* won't block */
+ uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
+ simple_unlock(&uobj->vmobjlock);
+ vrele(vp); /* drop vnode reference */
+ UVMHIST_LOG(maphist,"<- done/vrele! (persist)", 0,0,0,0);
+ return;
+ }
+
+ /*
+ * its a goner!
+ */
+
+ UVMHIST_LOG(maphist," its a goner (flushing)!", 0,0,0,0);
+
+ uvn->u_flags |= UVM_VNODE_DYING;
+
+ /*
+ * even though we may unlock in flush, no one can gain a reference
+ * to us until we clear the "dying" flag [because it blocks
+ * attaches]. we will not do that until after we've disposed of all
+ * the pages with uvn_flush(). note that before the flush the only
+ * pages that could be marked PG_BUSY are ones that are in async
+ * pageout by the daemon. (there can't be any pending "get"'s
+ * because there are no references to the object).
+ */
+
+ (void) uvn_flush(uobj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+ UVMHIST_LOG(maphist," its a goner (done flush)!", 0,0,0,0);
+
+ /*
+ * given the structure of this pager, the above flush request will
+ * create the following state: all the pages that were in the object
+ * have either been free'd or they are marked PG_BUSY|PG_RELEASED.
+ * the PG_BUSY bit was set either by us or the daemon for async I/O.
+ * in either case, if we have pages left we can't kill the object
+ * yet because i/o is pending. in this case we set the "relkill"
+ * flag which will cause pgo_releasepg to kill the object once all
+ * the I/O's are done [pgo_releasepg will be called from the aiodone
+ * routine or from the page daemon].
+ */
+
+ if (uobj->uo_npages) { /* I/O pending. iodone will free */
+#ifdef DIAGNOSTIC
+ /*
+ * XXXCDC: very unlikely to happen until we have async i/o
+ * so print a little info message in case it does.
+ */
+ printf("uvn_detach: vn %p has pages left after flush - "
+ "relkill mode\n", uobj);
+#endif
+ uvn->u_flags |= UVM_VNODE_RELKILL;
+ simple_unlock(&uobj->vmobjlock);
+ UVMHIST_LOG(maphist,"<- done! (releasepg will kill obj)", 0, 0,
+ 0, 0);
+ return;
+ }
+
+ /*
+ * kill object now. note that we can't be on the sync q because
+ * all references are gone.
+ */
+ if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+ simple_lock(&uvn_wl_lock); /* protect uvn_wlist */
+ LIST_REMOVE(uvn, u_wlist);
+ simple_unlock(&uvn_wl_lock);
+ }
+#ifdef DIAGNOSTIC
+ if (uobj->memq.tqh_first != NULL)
+ panic("uvn_deref: vnode VM object still has pages afer "
+ "syncio/free flush");
+#endif
+ oldflags = uvn->u_flags;
+ uvn->u_flags = 0;
+ simple_unlock(&uobj->vmobjlock);
+
+ /* wake up any sleepers */
+ if (oldflags & UVM_VNODE_WANTED)
+ wakeup(uvn);
+
+ /*
+ * drop our reference to the vnode.
+ */
+ vrele(vp);
+ UVMHIST_LOG(maphist,"<- done (vrele) final", 0,0,0,0);
+
+ return;
+}
+
+/*
+ * uvm_vnp_terminate: external hook to clear out a vnode's VM
+ *
+ * called in two cases:
+ * [1] when a persisting vnode vm object (i.e. one with a zero reference
+ * count) needs to be freed so that a vnode can be reused. this
+ * happens under "getnewvnode" in vfs_subr.c. if the vnode from
+ * the free list is still attached (i.e. not VBAD) then vgone is
+ * called. as part of the vgone trace this should get called to
+ * free the vm object. this is the common case.
+ * [2] when a filesystem is being unmounted by force (MNT_FORCE,
+ * "umount -f") the vgone() function is called on active vnodes
+ * on the mounted file systems to kill their data (the vnodes become
+ * "dead" ones [see src/sys/miscfs/deadfs/...]). that results in a
+ * call here (even if the uvn is still in use -- i.e. has a non-zero
+ * reference count). this case happens at "umount -f" and during a
+ * "reboot/halt" operation.
+ *
+ * => the caller must XLOCK and VOP_LOCK the vnode before calling us
+ * [protects us from getting a vnode that is already in the DYING
+ * state...]
+ * => unlike uvn_detach, this function must not return until all the
+ * uvn's pages are disposed of.
+ * => in case [2] the uvn is still alive after this call, but all I/O
+ * ops will fail (due to the backing vnode now being "dead"). this
+ * will prob. kill any process using the uvn due to pgo_get failing.
+ */
+
+void
+uvm_vnp_terminate(vp)
+ struct vnode *vp;
+{
+ struct uvm_vnode *uvn = &vp->v_uvm;
+ int oldflags;
+ UVMHIST_FUNC("uvm_vnp_terminate"); UVMHIST_CALLED(maphist);
+
+ /*
+ * lock object and check if it is valid
+ */
+ simple_lock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist, " vp=0x%x, ref=%d, flag=0x%x", vp,
+ uvn->u_obj.uo_refs, uvn->u_flags, 0);
+ if ((uvn->u_flags & UVM_VNODE_VALID) == 0) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist, "<- done (not active)", 0, 0, 0, 0);
+ return;
+ }
+
+ /*
+ * must be a valid uvn that is not already dying (because XLOCK
+ * protects us from that). the uvn can't in the the ALOCK state
+ * because it is valid, and uvn's that are in the ALOCK state haven't
+ * been marked valid yet.
+ */
+
+#ifdef DEBUG
+ /*
+ * debug check: are we yanking the vnode out from under our uvn?
+ */
+ if (uvn->u_obj.uo_refs) {
+ printf("uvm_vnp_terminate(%p): terminating active vnode "
+ "(refs=%d)\n", uvn, uvn->u_obj.uo_refs);
+ }
+#endif
+
+ /*
+ * it is possible that the uvn was detached and is in the relkill
+ * state [i.e. waiting for async i/o to finish so that releasepg can
+ * kill object]. we take over the vnode now and cancel the relkill.
+ * we want to know when the i/o is done so we can recycle right
+ * away. note that a uvn can only be in the RELKILL state if it
+ * has a zero reference count.
+ */
+
+ if (uvn->u_flags & UVM_VNODE_RELKILL)
+ uvn->u_flags &= ~UVM_VNODE_RELKILL; /* cancel RELKILL */
+
+ /*
+ * block the uvn by setting the dying flag, and then flush the
+ * pages. (note that flush may unlock object while doing I/O, but
+ * it will re-lock it before it returns control here).
+ *
+ * also, note that we tell I/O that we are already VOP_LOCK'd so
+ * that uvn_io doesn't attempt to VOP_LOCK again.
+ *
+ * XXXCDC: setting VNISLOCKED on an active uvn which is being terminated
+ * due to a forceful unmount might not be a good idea. maybe we
+ * need a way to pass in this info to uvn_flush through a
+ * pager-defined PGO_ constant [currently there are none].
+ */
+ uvn->u_flags |= UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED;
+
+ (void) uvn_flush(&uvn->u_obj, 0, 0, PGO_CLEANIT|PGO_FREE|PGO_ALLPAGES);
+
+ /*
+ * as we just did a flush we expect all the pages to be gone or in
+ * the process of going. sleep to wait for the rest to go [via iosync].
+ */
+
+ while (uvn->u_obj.uo_npages) {
+#ifdef DIAGNOSTIC
+ struct vm_page *pp;
+ for (pp = uvn->u_obj.memq.tqh_first ; pp != NULL ;
+ pp = pp->listq.tqe_next) {
+ if ((pp->flags & PG_BUSY) == 0)
+ panic("uvm_vnp_terminate: detected unbusy pg");
+ }
+ if (uvn->u_nio == 0)
+ panic("uvm_vnp_terminate: no I/O to wait for?");
+ printf("uvm_vnp_terminate: waiting for I/O to fin.\n");
+ /*
+ * XXXCDC: this is unlikely to happen without async i/o so we
+ * put a printf in just to keep an eye on it.
+ */
+#endif
+ uvn->u_flags |= UVM_VNODE_IOSYNC;
+ UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock, FALSE,
+ "uvn_term",0);
+ simple_lock(&uvn->u_obj.vmobjlock);
+ }
+
+ /*
+ * done. now we free the uvn if its reference count is zero
+ * (true if we are zapping a persisting uvn). however, if we are
+ * terminating a uvn with active mappings we let it live ... future
+ * calls down to the vnode layer will fail.
+ */
+
+ oldflags = uvn->u_flags;
+ if (uvn->u_obj.uo_refs) {
+
+ /*
+ * uvn must live on it is dead-vnode state until all references
+ * are gone. restore flags. clear CANPERSIST state.
+ */
+
+ uvn->u_flags &= ~(UVM_VNODE_DYING|UVM_VNODE_VNISLOCKED|
+ UVM_VNODE_WANTED|UVM_VNODE_CANPERSIST);
+
+ } else {
+
+ /*
+ * free the uvn now. note that the VREF reference is already
+ * gone [it is dropped when we enter the persist state].
+ */
+ if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+ panic("uvm_vnp_terminate: io sync wanted bit set");
+
+ if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+ simple_lock(&uvn_wl_lock);
+ LIST_REMOVE(uvn, u_wlist);
+ simple_unlock(&uvn_wl_lock);
+ }
+ uvn->u_flags = 0; /* uvn is history, clear all bits */
+ }
+
+ if (oldflags & UVM_VNODE_WANTED)
+ wakeup(uvn); /* object lock still held */
+
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist, "<- done", 0, 0, 0, 0);
+
+}
+
+/*
+ * uvn_releasepg: handled a released page in a uvn
+ *
+ * => "pg" is a PG_BUSY [caller owns it], PG_RELEASED page that we need
+ * to dispose of.
+ * => caller must handled PG_WANTED case
+ * => called with page's object locked, pageq's unlocked
+ * => returns TRUE if page's object is still alive, FALSE if we
+ * killed the page's object. if we return TRUE, then we
+ * return with the object locked.
+ * => if (nextpgp != NULL) => we return pageq.tqe_next here, and return
+ * with the page queues locked [for pagedaemon]
+ * => if (nextpgp == NULL) => we return with page queues unlocked [normal case]
+ * => we kill the uvn if it is not referenced and we are suppose to
+ * kill it ("relkill").
+ */
+
+boolean_t
+uvn_releasepg(pg, nextpgp)
+ struct vm_page *pg;
+ struct vm_page **nextpgp; /* OUT */
+{
+ struct uvm_vnode *uvn = (struct uvm_vnode *) pg->uobject;
+#ifdef DIAGNOSTIC
+ if ((pg->flags & PG_RELEASED) == 0)
+ panic("uvn_releasepg: page not released!");
+#endif
+
+ /*
+ * dispose of the page [caller handles PG_WANTED]
+ */
+ pmap_page_protect(PMAP_PGARG(pg), VM_PROT_NONE);
+ uvm_lock_pageq();
+ if (nextpgp)
+ *nextpgp = pg->pageq.tqe_next; /* next page for daemon */
+ uvm_pagefree(pg);
+ if (!nextpgp)
+ uvm_unlock_pageq();
+
+ /*
+ * now see if we need to kill the object
+ */
+ if (uvn->u_flags & UVM_VNODE_RELKILL) {
+ if (uvn->u_obj.uo_refs)
+ panic("uvn_releasepg: kill flag set on referenced "
+ "object!");
+ if (uvn->u_obj.uo_npages == 0) {
+ if (uvn->u_flags & UVM_VNODE_WRITEABLE) {
+ simple_lock(&uvn_wl_lock);
+ LIST_REMOVE(uvn, u_wlist);
+ simple_unlock(&uvn_wl_lock);
+ }
+#ifdef DIAGNOSTIC
+ if (uvn->u_obj.memq.tqh_first)
+ panic("uvn_releasepg: pages in object with npages == 0");
+#endif
+ if (uvn->u_flags & UVM_VNODE_WANTED)
+ /* still holding object lock */
+ wakeup(uvn);
+
+ uvn->u_flags = 0; /* DEAD! */
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ return (FALSE);
+ }
+ }
+ return (TRUE);
+}
+
+/*
+ * NOTE: currently we have to use VOP_READ/VOP_WRITE because they go
+ * through the buffer cache and allow I/O in any size. These VOPs use
+ * synchronous i/o. [vs. VOP_STRATEGY which can be async, but doesn't
+ * go through the buffer cache or allow I/O sizes larger than a
+ * block]. we will eventually want to change this.
+ *
+ * issues to consider:
+ * uvm provides the uvm_aiodesc structure for async i/o management.
+ * there are two tailq's in the uvm. structure... one for pending async
+ * i/o and one for "done" async i/o. to do an async i/o one puts
+ * an aiodesc on the "pending" list (protected by splbio()), starts the
+ * i/o and returns VM_PAGER_PEND. when the i/o is done, we expect
+ * some sort of "i/o done" function to be called (at splbio(), interrupt
+ * time). this function should remove the aiodesc from the pending list
+ * and place it on the "done" list and wakeup the daemon. the daemon
+ * will run at normal spl() and will remove all items from the "done"
+ * list and call the "aiodone" hook for each done request (see uvm_pager.c).
+ * [in the old vm code, this was done by calling the "put" routine with
+ * null arguments which made the code harder to read and understand because
+ * you had one function ("put") doing two things.]
+ *
+ * so the current pager needs:
+ * int uvn_aiodone(struct uvm_aiodesc *)
+ *
+ * => return KERN_SUCCESS (aio finished, free it). otherwise requeue for
+ * later collection.
+ * => called with pageq's locked by the daemon.
+ *
+ * general outline:
+ * - "try" to lock object. if fail, just return (will try again later)
+ * - drop "u_nio" (this req is done!)
+ * - if (object->iosync && u_naio == 0) { wakeup &uvn->u_naio }
+ * - get "page" structures (atop?).
+ * - handle "wanted" pages
+ * - handle "released" pages [using pgo_releasepg]
+ * >>> pgo_releasepg may kill the object
+ * dont forget to look at "object" wanted flag in all cases.
+ */
+
+
+/*
+ * uvn_flush: flush pages out of a uvm object.
+ *
+ * => object should be locked by caller. we may _unlock_ the object
+ * if (and only if) we need to clean a page (PGO_CLEANIT).
+ * we return with the object locked.
+ * => if PGO_CLEANIT is set, we may block (due to I/O). thus, a caller
+ * might want to unlock higher level resources (e.g. vm_map)
+ * before calling flush.
+ * => if PGO_CLEANIT is not set, then we will neither unlock the object
+ * or block.
+ * => if PGO_ALLPAGE is set, then all pages in the object are valid targets
+ * for flushing.
+ * => NOTE: we rely on the fact that the object's memq is a TAILQ and
+ * that new pages are inserted on the tail end of the list. thus,
+ * we can make a complete pass through the object in one go by starting
+ * at the head and working towards the tail (new pages are put in
+ * front of us).
+ * => NOTE: we are allowed to lock the page queues, so the caller
+ * must not be holding the lock on them [e.g. pagedaemon had
+ * better not call us with the queues locked]
+ * => we return TRUE unless we encountered some sort of I/O error
+ *
+ * comment on "cleaning" object and PG_BUSY pages:
+ * this routine is holding the lock on the object. the only time
+ * that it can run into a PG_BUSY page that it does not own is if
+ * some other process has started I/O on the page (e.g. either
+ * a pagein, or a pageout). if the PG_BUSY page is being paged
+ * in, then it can not be dirty (!PG_CLEAN) because no one has
+ * had a chance to modify it yet. if the PG_BUSY page is being
+ * paged out then it means that someone else has already started
+ * cleaning the page for us (how nice!). in this case, if we
+ * have syncio specified, then after we make our pass through the
+ * object we need to wait for the other PG_BUSY pages to clear
+ * off (i.e. we need to do an iosync). also note that once a
+ * page is PG_BUSY it must stay in its object until it is un-busyed.
+ *
+ * note on page traversal:
+ * we can traverse the pages in an object either by going down the
+ * linked list in "uobj->memq", or we can go over the address range
+ * by page doing hash table lookups for each address. depending
+ * on how many pages are in the object it may be cheaper to do one
+ * or the other. we set "by_list" to true if we are using memq.
+ * if the cost of a hash lookup was equal to the cost of the list
+ * traversal we could compare the number of pages in the start->stop
+ * range to the total number of pages in the object. however, it
+ * seems that a hash table lookup is more expensive than the linked
+ * list traversal, so we multiply the number of pages in the
+ * start->stop range by a penalty which we define below.
+ */
+
+#define UVN_HASH_PENALTY 4 /* XXX: a guess */
+
+static boolean_t
+uvn_flush(uobj, start, stop, flags)
+ struct uvm_object *uobj;
+ vaddr_t start, stop;
+ int flags;
+{
+ struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+ struct vm_page *pp, *ppnext, *ptmp;
+ struct vm_page *pps[MAXBSIZE >> PAGE_SHIFT], **ppsp;
+ int npages, result, lcv;
+ boolean_t retval, need_iosync, by_list, needs_clean;
+ vaddr_t curoff;
+ u_short pp_version;
+ UVMHIST_FUNC("uvn_flush"); UVMHIST_CALLED(maphist);
+
+ curoff = 0; /* XXX: shut up gcc */
+ /*
+ * get init vals and determine how we are going to traverse object
+ */
+
+ need_iosync = FALSE;
+ retval = TRUE; /* return value */
+ if (flags & PGO_ALLPAGES) {
+ start = 0;
+ stop = round_page(uvn->u_size);
+ by_list = TRUE; /* always go by the list */
+ } else {
+ start = trunc_page(start);
+ stop = round_page(stop);
+ if (stop > round_page(uvn->u_size))
+ printf("uvn_flush: strange, got an out of range "
+ "flush (fixed)\n");
+
+ by_list = (uobj->uo_npages <=
+ ((stop - start) >> PAGE_SHIFT) * UVN_HASH_PENALTY);
+ }
+
+ UVMHIST_LOG(maphist,
+ " flush start=0x%x, stop=0x%x, by_list=%d, flags=0x%x",
+ start, stop, by_list, flags);
+
+ /*
+ * PG_CLEANCHK: this bit is used by the pgo_mk_pcluster function as
+ * a _hint_ as to how up to date the PG_CLEAN bit is. if the hint
+ * is wrong it will only prevent us from clustering... it won't break
+ * anything. we clear all PG_CLEANCHK bits here, and pgo_mk_pcluster
+ * will set them as it syncs PG_CLEAN. This is only an issue if we
+ * are looking at non-inactive pages (because inactive page's PG_CLEAN
+ * bit is always up to date since there are no mappings).
+ * [borrowed PG_CLEANCHK idea from FreeBSD VM]
+ */
+
+ if ((flags & PGO_CLEANIT) != 0 &&
+ uobj->pgops->pgo_mk_pcluster != NULL) {
+ if (by_list) {
+ for (pp = uobj->memq.tqh_first ; pp != NULL ;
+ pp = pp->listq.tqe_next) {
+ if (pp->offset < start || pp->offset >= stop)
+ continue;
+ pp->flags &= ~PG_CLEANCHK;
+ }
+
+ } else { /* by hash */
+ for (curoff = start ; curoff < stop;
+ curoff += PAGE_SIZE) {
+ pp = uvm_pagelookup(uobj, curoff);
+ if (pp)
+ pp->flags &= ~PG_CLEANCHK;
+ }
+ }
+ }
+
+ /*
+ * now do it. note: we must update ppnext in body of loop or we
+ * will get stuck. we need to use ppnext because we may free "pp"
+ * before doing the next loop.
+ */
+
+ if (by_list) {
+ pp = uobj->memq.tqh_first;
+ } else {
+ curoff = start;
+ pp = uvm_pagelookup(uobj, curoff);
+ }
+
+ ppnext = NULL; /* XXX: shut up gcc */
+ ppsp = NULL; /* XXX: shut up gcc */
+ uvm_lock_pageq(); /* page queues locked */
+
+ /* locked: both page queues and uobj */
+ for ( ; (by_list && pp != NULL) ||
+ (!by_list && curoff < stop) ; pp = ppnext) {
+
+ if (by_list) {
+
+ /*
+ * range check
+ */
+
+ if (pp->offset < start || pp->offset >= stop) {
+ ppnext = pp->listq.tqe_next;
+ continue;
+ }
+
+ } else {
+
+ /*
+ * null check
+ */
+
+ curoff += PAGE_SIZE;
+ if (pp == NULL) {
+ if (curoff < stop)
+ ppnext = uvm_pagelookup(uobj, curoff);
+ continue;
+ }
+
+ }
+
+ /*
+ * handle case where we do not need to clean page (either
+ * because we are not clean or because page is not dirty or
+ * is busy):
+ *
+ * NOTE: we are allowed to deactivate a non-wired active
+ * PG_BUSY page, but once a PG_BUSY page is on the inactive
+ * queue it must stay put until it is !PG_BUSY (so as not to
+ * confuse pagedaemon).
+ */
+
+ if ((flags & PGO_CLEANIT) == 0 || (pp->flags & PG_BUSY) != 0) {
+ needs_clean = FALSE;
+ if ((pp->flags & PG_BUSY) != 0 &&
+ (flags & (PGO_CLEANIT|PGO_SYNCIO)) ==
+ (PGO_CLEANIT|PGO_SYNCIO))
+ need_iosync = TRUE;
+ } else {
+ /*
+ * freeing: nuke all mappings so we can sync
+ * PG_CLEAN bit with no race
+ */
+ if ((pp->flags & PG_CLEAN) != 0 &&
+ (flags & PGO_FREE) != 0 &&
+ (pp->pqflags & PQ_ACTIVE) != 0)
+ pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+ if ((pp->flags & PG_CLEAN) != 0 &&
+ pmap_is_modified(PMAP_PGARG(pp)))
+ pp->flags &= ~(PG_CLEAN);
+ pp->flags |= PG_CLEANCHK; /* update "hint" */
+
+ needs_clean = ((pp->flags & PG_CLEAN) == 0);
+ }
+
+ /*
+ * if we don't need a clean... load ppnext and dispose of pp
+ */
+ if (!needs_clean) {
+ /* load ppnext */
+ if (by_list)
+ ppnext = pp->listq.tqe_next;
+ else {
+ if (curoff < stop)
+ ppnext = uvm_pagelookup(uobj, curoff);
+ }
+
+ /* now dispose of pp */
+ if (flags & PGO_DEACTIVATE) {
+ if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+ pp->wire_count == 0) {
+ pmap_page_protect(PMAP_PGARG(pp),
+ VM_PROT_NONE);
+ uvm_pagedeactivate(pp);
+ }
+
+ } else if (flags & PGO_FREE) {
+ if (pp->flags & PG_BUSY) {
+ /* release busy pages */
+ pp->flags |= PG_RELEASED;
+ } else {
+ pmap_page_protect(PMAP_PGARG(pp),
+ VM_PROT_NONE);
+ /* removed page from object */
+ uvm_pagefree(pp);
+ }
+ }
+ /* ppnext is valid so we can continue... */
+ continue;
+ }
+
+ /*
+ * pp points to a page in the locked object that we are
+ * working on. if it is !PG_CLEAN,!PG_BUSY and we asked
+ * for cleaning (PGO_CLEANIT). we clean it now.
+ *
+ * let uvm_pager_put attempted a clustered page out.
+ * note: locked: uobj and page queues.
+ */
+
+ pp->flags |= PG_BUSY; /* we 'own' page now */
+ UVM_PAGE_OWN(pp, "uvn_flush");
+ pmap_page_protect(PMAP_PGARG(pp), VM_PROT_READ);
+ pp_version = pp->version;
+ReTry:
+ ppsp = pps;
+ npages = sizeof(pps) / sizeof(struct vm_page *);
+
+ /* locked: page queues, uobj */
+ result = uvm_pager_put(uobj, pp, &ppsp, &npages,
+ flags | PGO_DOACTCLUST, start, stop);
+ /* unlocked: page queues, uobj */
+
+ /*
+ * at this point nothing is locked. if we did an async I/O
+ * it is remotely possible for the async i/o to complete and
+ * the page "pp" be freed or what not before we get a chance
+ * to relock the object. in order to detect this, we have
+ * saved the version number of the page in "pp_version".
+ */
+
+ /* relock! */
+ simple_lock(&uobj->vmobjlock);
+ uvm_lock_pageq();
+
+ /*
+ * VM_PAGER_AGAIN: given the structure of this pager, this
+ * can only happen when we are doing async I/O and can't
+ * map the pages into kernel memory (pager_map) due to lack
+ * of vm space. if this happens we drop back to sync I/O.
+ */
+
+ if (result == VM_PAGER_AGAIN) {
+ /*
+ * it is unlikely, but page could have been released
+ * while we had the object lock dropped. we ignore
+ * this now and retry the I/O. we will detect and
+ * handle the released page after the syncio I/O
+ * completes.
+ */
+#ifdef DIAGNOSTIC
+ if (flags & PGO_SYNCIO)
+ panic("uvn_flush: PGO_SYNCIO return 'try again' error (impossible)");
+#endif
+ flags |= PGO_SYNCIO;
+ goto ReTry;
+ }
+
+ /*
+ * the cleaning operation is now done. finish up. note that
+ * on error (!OK, !PEND) uvm_pager_put drops the cluster for us.
+ * if success (OK, PEND) then uvm_pager_put returns the cluster
+ * to us in ppsp/npages.
+ */
+
+ /*
+ * for pending async i/o if we are not deactivating/freeing
+ * we can move on to the next page.
+ */
+
+ if (result == VM_PAGER_PEND) {
+
+ if ((flags & (PGO_DEACTIVATE|PGO_FREE)) == 0) {
+ /*
+ * no per-page ops: refresh ppnext and continue
+ */
+ if (by_list) {
+ if (pp->version == pp_version)
+ ppnext = pp->listq.tqe_next;
+ else
+ /* reset */
+ ppnext = uobj->memq.tqh_first;
+ } else {
+ if (curoff < stop)
+ ppnext = uvm_pagelookup(uobj,
+ curoff);
+ }
+ continue;
+ }
+
+ /* need to do anything here? */
+ }
+
+ /*
+ * need to look at each page of the I/O operation. we defer
+ * processing "pp" until the last trip through this "for" loop
+ * so that we can load "ppnext" for the main loop after we
+ * play with the cluster pages [thus the "npages + 1" in the
+ * loop below].
+ */
+
+ for (lcv = 0 ; lcv < npages + 1 ; lcv++) {
+
+ /*
+ * handle ppnext for outside loop, and saving pp
+ * until the end.
+ */
+ if (lcv < npages) {
+ if (ppsp[lcv] == pp)
+ continue; /* skip pp until the end */
+ ptmp = ppsp[lcv];
+ } else {
+ ptmp = pp;
+
+ /* set up next page for outer loop */
+ if (by_list) {
+ if (pp->version == pp_version)
+ ppnext = pp->listq.tqe_next;
+ else
+ /* reset */
+ ppnext = uobj->memq.tqh_first;
+ } else {
+ if (curoff < stop)
+ ppnext = uvm_pagelookup(uobj, curoff);
+ }
+ }
+
+ /*
+ * verify the page didn't get moved while obj was
+ * unlocked
+ */
+ if (result == VM_PAGER_PEND && ptmp->uobject != uobj)
+ continue;
+
+ /*
+ * unbusy the page if I/O is done. note that for
+ * pending I/O it is possible that the I/O op
+ * finished before we relocked the object (in
+ * which case the page is no longer busy).
+ */
+
+ if (result != VM_PAGER_PEND) {
+ if (ptmp->flags & PG_WANTED)
+ /* still holding object lock */
+ thread_wakeup(ptmp);
+
+ ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(ptmp, NULL);
+ if (ptmp->flags & PG_RELEASED) {
+
+ /* pgo_releasepg wants this */
+ uvm_unlock_pageq();
+ if (!uvn_releasepg(ptmp, NULL))
+ return (TRUE);
+
+ uvm_lock_pageq(); /* relock */
+ continue; /* next page */
+
+ } else {
+ ptmp->flags |= (PG_CLEAN|PG_CLEANCHK);
+ if ((flags & PGO_FREE) == 0)
+ pmap_clear_modify(
+ PMAP_PGARG(ptmp));
+ }
+ }
+
+ /*
+ * dispose of page
+ */
+
+ if (flags & PGO_DEACTIVATE) {
+ if ((pp->pqflags & PQ_INACTIVE) == 0 &&
+ pp->wire_count == 0) {
+ pmap_page_protect(PMAP_PGARG(ptmp),
+ VM_PROT_NONE);
+ uvm_pagedeactivate(ptmp);
+ }
+
+ } else if (flags & PGO_FREE) {
+ if (result == VM_PAGER_PEND) {
+ if ((ptmp->flags & PG_BUSY) != 0)
+ /* signal for i/o done */
+ ptmp->flags |= PG_RELEASED;
+ } else {
+ if (result != VM_PAGER_OK) {
+ printf("uvn_flush: obj=%p, "
+ "offset=0x%lx. error "
+ "during pageout.\n",
+ pp->uobject, pp->offset);
+ printf("uvn_flush: WARNING: "
+ "changes to page may be "
+ "lost!\n");
+ retval = FALSE;
+ }
+ pmap_page_protect(PMAP_PGARG(ptmp),
+ VM_PROT_NONE);
+ uvm_pagefree(ptmp);
+ }
+ }
+
+ } /* end of "lcv" for loop */
+
+ } /* end of "pp" for loop */
+
+ /*
+ * done with pagequeues: unlock
+ */
+ uvm_unlock_pageq();
+
+ /*
+ * now wait for all I/O if required.
+ */
+ if (need_iosync) {
+
+ UVMHIST_LOG(maphist," <<DOING IOSYNC>>",0,0,0,0);
+ while (uvn->u_nio != 0) {
+ uvn->u_flags |= UVM_VNODE_IOSYNC;
+ UVM_UNLOCK_AND_WAIT(&uvn->u_nio, &uvn->u_obj.vmobjlock,
+ FALSE, "uvn_flush",0);
+ simple_lock(&uvn->u_obj.vmobjlock);
+ }
+ if (uvn->u_flags & UVM_VNODE_IOSYNCWANTED)
+ wakeup(&uvn->u_flags);
+ uvn->u_flags &= ~(UVM_VNODE_IOSYNC|UVM_VNODE_IOSYNCWANTED);
+ }
+
+ /* return, with object locked! */
+ UVMHIST_LOG(maphist,"<- done (retval=0x%x)",retval,0,0,0);
+ return(retval);
+}
+
+/*
+ * uvn_cluster
+ *
+ * we are about to do I/O in an object at offset. this function is called
+ * to establish a range of offsets around "offset" in which we can cluster
+ * I/O.
+ *
+ * - currently doesn't matter if obj locked or not.
+ */
+
+static void
+uvn_cluster(uobj, offset, loffset, hoffset)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ vaddr_t *loffset, *hoffset; /* OUT */
+{
+ struct uvm_vnode *uvn = (struct uvm_vnode *) uobj;
+ *loffset = offset;
+
+ if (*loffset >= uvn->u_size)
+ panic("uvn_cluster: offset out of range");
+
+ /*
+ * XXX: old pager claims we could use VOP_BMAP to get maxcontig value.
+ */
+ *hoffset = *loffset + MAXBSIZE;
+ if (*hoffset > round_page(uvn->u_size)) /* past end? */
+ *hoffset = round_page(uvn->u_size);
+
+ return;
+}
+
+/*
+ * uvn_put: flush page data to backing store.
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked! we will _unlock_ it before starting I/O.
+ * => flags: PGO_SYNCIO -- use sync. I/O
+ * => note: caller must set PG_CLEAN and pmap_clear_modify (if needed)
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ * [thus we never do async i/o! see iodone comment]
+ */
+
+static int
+uvn_put(uobj, pps, npages, flags)
+ struct uvm_object *uobj;
+ struct vm_page **pps;
+ int npages, flags;
+{
+ int retval;
+
+ /* note: object locked */
+ retval = uvn_io((struct uvm_vnode*)uobj, pps, npages, flags, UIO_WRITE);
+ /* note: object unlocked */
+
+ return(retval);
+}
+
+
+/*
+ * uvn_get: get pages (synchronously) from backing store
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked! we will _unlock_ it before starting any I/O.
+ * => flags: PGO_ALLPAGES: get all of the pages
+ * PGO_LOCKED: fault data structures are locked
+ * => NOTE: offset is the offset of pps[0], _NOT_ pps[centeridx]
+ * => NOTE: caller must check for released pages!!
+ */
+
+static int
+uvn_get(uobj, offset, pps, npagesp, centeridx, access_type, advice, flags)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ struct vm_page **pps; /* IN/OUT */
+ int *npagesp; /* IN (OUT if PGO_LOCKED) */
+ int centeridx, advice, flags;
+ vm_prot_t access_type;
+{
+ vaddr_t current_offset;
+ struct vm_page *ptmp;
+ int lcv, result, gotpages;
+ boolean_t done;
+ UVMHIST_FUNC("uvn_get"); UVMHIST_CALLED(maphist);
+ UVMHIST_LOG(maphist, "flags=%d", flags,0,0,0);
+
+ /*
+ * step 1: handled the case where fault data structures are locked.
+ */
+
+ if (flags & PGO_LOCKED) {
+
+ /*
+ * gotpages is the current number of pages we've gotten (which
+ * we pass back up to caller via *npagesp.
+ */
+
+ gotpages = 0;
+
+ /*
+ * step 1a: get pages that are already resident. only do this
+ * if the data structures are locked (i.e. the first time
+ * through).
+ */
+
+ done = TRUE; /* be optimistic */
+
+ for (lcv = 0, current_offset = offset ; lcv < *npagesp ;
+ lcv++, current_offset += PAGE_SIZE) {
+
+ /* do we care about this page? if not, skip it */
+ if (pps[lcv] == PGO_DONTCARE)
+ continue;
+
+ /* lookup page */
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /* to be useful must get a non-busy, non-released pg */
+ if (ptmp == NULL ||
+ (ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ if (lcv == centeridx || (flags & PGO_ALLPAGES)
+ != 0)
+ done = FALSE; /* need to do a wait or I/O! */
+ continue;
+ }
+
+ /*
+ * useful page: busy/lock it and plug it in our
+ * result array
+ */
+ ptmp->flags |= PG_BUSY; /* loan up to caller */
+ UVM_PAGE_OWN(ptmp, "uvn_get1");
+ pps[lcv] = ptmp;
+ gotpages++;
+
+ } /* "for" lcv loop */
+
+ /*
+ * XXX: given the "advice", should we consider async read-ahead?
+ * XXX: fault current does deactive of pages behind us. is
+ * this good (other callers might now).
+ */
+ /*
+ * XXX: read-ahead currently handled by buffer cache (bread)
+ * level.
+ * XXX: no async i/o available.
+ * XXX: so we don't do anything now.
+ */
+
+ /*
+ * step 1c: now we've either done everything needed or we to
+ * unlock and do some waiting or I/O.
+ */
+
+ *npagesp = gotpages; /* let caller know */
+ if (done)
+ return(VM_PAGER_OK); /* bingo! */
+ else
+ /* EEK! Need to unlock and I/O */
+ return(VM_PAGER_UNLOCK);
+ }
+
+ /*
+ * step 2: get non-resident or busy pages.
+ * object is locked. data structures are unlocked.
+ *
+ * XXX: because we can't do async I/O at this level we get things
+ * page at a time (otherwise we'd chunk). the VOP_READ() will do
+ * async-read-ahead for us at a lower level.
+ */
+
+ for (lcv = 0, current_offset = offset ;
+ lcv < *npagesp ; lcv++, current_offset += PAGE_SIZE) {
+
+ /* skip over pages we've already gotten or don't want */
+ /* skip over pages we don't _have_ to get */
+ if (pps[lcv] != NULL || (lcv != centeridx &&
+ (flags & PGO_ALLPAGES) == 0))
+ continue;
+
+ /*
+ * we have yet to locate the current page (pps[lcv]). we first
+ * look for a page that is already at the current offset. if
+ * we fine a page, we check to see if it is busy or released.
+ * if that is the case, then we sleep on the page until it is
+ * no longer busy or released and repeat the lookup. if the
+ * page we found is neither busy nor released, then we busy it
+ * (so we own it) and plug it into pps[lcv]. this breaks the
+ * following while loop and indicates we are ready to move on
+ * to the next page in the "lcv" loop above.
+ *
+ * if we exit the while loop with pps[lcv] still set to NULL,
+ * then it means that we allocated a new busy/fake/clean page
+ * ptmp in the object and we need to do I/O to fill in the data.
+ */
+
+ while (pps[lcv] == NULL) { /* top of "pps" while loop */
+
+ /* look for a current page */
+ ptmp = uvm_pagelookup(uobj, current_offset);
+
+ /* nope? allocate one now (if we can) */
+ if (ptmp == NULL) {
+
+ ptmp = uvm_pagealloc(uobj, current_offset,
+ NULL); /* alloc */
+
+ /* out of RAM? */
+ if (ptmp == NULL) {
+ simple_unlock(&uobj->vmobjlock);
+ uvm_wait("uvn_getpage");
+ simple_lock(&uobj->vmobjlock);
+
+ /* goto top of pps while loop */
+ continue;
+ }
+
+ /*
+ * got new page ready for I/O. break pps
+ * while loop. pps[lcv] is still NULL.
+ */
+ break;
+ }
+
+ /* page is there, see if we need to wait on it */
+ if ((ptmp->flags & (PG_BUSY|PG_RELEASED)) != 0) {
+ ptmp->flags |= PG_WANTED;
+ UVM_UNLOCK_AND_WAIT(ptmp,
+ &uobj->vmobjlock, 0, "uvn_get",0);
+ simple_lock(&uobj->vmobjlock);
+ continue; /* goto top of pps while loop */
+ }
+
+ /*
+ * if we get here then the page has become resident
+ * and unbusy between steps 1 and 2. we busy it
+ * now (so we own it) and set pps[lcv] (so that we
+ * exit the while loop).
+ */
+ ptmp->flags |= PG_BUSY;
+ UVM_PAGE_OWN(ptmp, "uvn_get2");
+ pps[lcv] = ptmp;
+ }
+
+ /*
+ * if we own the a valid page at the correct offset, pps[lcv]
+ * will point to it. nothing more to do except go to the
+ * next page.
+ */
+
+ if (pps[lcv])
+ continue; /* next lcv */
+
+ /*
+ * we have a "fake/busy/clean" page that we just allocated. do
+ * I/O to fill it with valid data. note that object must be
+ * locked going into uvn_io, but will be unlocked afterwards.
+ */
+
+ result = uvn_io((struct uvm_vnode *) uobj, &ptmp, 1,
+ PGO_SYNCIO, UIO_READ);
+
+ /*
+ * I/O done. object is unlocked (by uvn_io). because we used
+ * syncio the result can not be PEND or AGAIN. we must relock
+ * and check for errors.
+ */
+
+ /* lock object. check for errors. */
+ simple_lock(&uobj->vmobjlock);
+ if (result != VM_PAGER_OK) {
+ if (ptmp->flags & PG_WANTED)
+ /* object lock still held */
+ thread_wakeup(ptmp);
+
+ ptmp->flags &= ~(PG_WANTED|PG_BUSY);
+ UVM_PAGE_OWN(ptmp, NULL);
+ uvm_lock_pageq();
+ uvm_pagefree(ptmp);
+ uvm_unlock_pageq();
+ simple_unlock(&uobj->vmobjlock);
+ return(result);
+ }
+
+ /*
+ * we got the page! clear the fake flag (indicates valid
+ * data now in page) and plug into our result array. note
+ * that page is still busy.
+ *
+ * it is the callers job to:
+ * => check if the page is released
+ * => unbusy the page
+ * => activate the page
+ */
+
+ ptmp->flags &= ~PG_FAKE; /* data is valid ... */
+ pmap_clear_modify(PMAP_PGARG(ptmp)); /* ... and clean */
+ pps[lcv] = ptmp;
+
+ } /* lcv loop */
+
+ /*
+ * finally, unlock object and return.
+ */
+
+ simple_unlock(&uobj->vmobjlock);
+ return (VM_PAGER_OK);
+}
+
+/*
+ * uvn_asyncget: start async I/O to bring pages into ram
+ *
+ * => caller must lock object(???XXX: see if this is best)
+ * => could be called from uvn_get or a madvise() fault-ahead.
+ * => if it fails, it doesn't matter.
+ */
+
+static int
+uvn_asyncget(uobj, offset, npages)
+ struct uvm_object *uobj;
+ vaddr_t offset;
+ int npages;
+{
+
+ /*
+ * XXXCDC: we can't do async I/O yet
+ */
+ printf("uvn_asyncget called\n");
+ return (KERN_SUCCESS);
+}
+
+/*
+ * uvn_io: do I/O to a vnode
+ *
+ * => prefer map unlocked (not required)
+ * => object must be locked! we will _unlock_ it before starting I/O.
+ * => flags: PGO_SYNCIO -- use sync. I/O
+ * => XXX: currently we use VOP_READ/VOP_WRITE which are only sync.
+ * [thus we never do async i/o! see iodone comment]
+ */
+
+static int
+uvn_io(uvn, pps, npages, flags, rw)
+ struct uvm_vnode *uvn;
+ vm_page_t *pps;
+ int npages, flags, rw;
+{
+ struct vnode *vn;
+ struct uio uio;
+ struct iovec iov;
+ vaddr_t kva, file_offset;
+ int waitf, result, got, wanted;
+ UVMHIST_FUNC("uvn_io"); UVMHIST_CALLED(maphist);
+
+ UVMHIST_LOG(maphist, "rw=%d", rw,0,0,0);
+
+ /*
+ * init values
+ */
+
+ waitf = (flags & PGO_SYNCIO) ? M_WAITOK : M_NOWAIT;
+ vn = (struct vnode *) uvn;
+ file_offset = pps[0]->offset;
+
+ /*
+ * check for sync'ing I/O.
+ */
+
+ while (uvn->u_flags & UVM_VNODE_IOSYNC) {
+ if (waitf == M_NOWAIT) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist,"<- try again (iosync)",0,0,0,0);
+ return(VM_PAGER_AGAIN);
+ }
+ uvn->u_flags |= UVM_VNODE_IOSYNCWANTED;
+ UVM_UNLOCK_AND_WAIT(&uvn->u_flags, &uvn->u_obj.vmobjlock,
+ FALSE, "uvn_iosync",0);
+ simple_lock(&uvn->u_obj.vmobjlock);
+ }
+
+ /*
+ * check size
+ */
+
+ if (file_offset >= uvn->u_size) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist,"<- BAD (size check)",0,0,0,0);
+#ifdef DIAGNOSTIC
+ printf("uvn_io: note: size check fired\n");
+#endif
+ return(VM_PAGER_BAD);
+ }
+
+ /*
+ * first try and map the pages in (without waiting)
+ */
+
+ kva = uvm_pagermapin(pps, npages, NULL, M_NOWAIT);
+ if (kva == NULL && waitf == M_NOWAIT) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ UVMHIST_LOG(maphist,"<- mapin failed (try again)",0,0,0,0);
+ return(VM_PAGER_AGAIN);
+ }
+
+ /*
+ * ok, now bump u_nio up. at this point we are done with uvn
+ * and can unlock it. if we still don't have a kva, try again
+ * (this time with sleep ok).
+ */
+
+ uvn->u_nio++; /* we have an I/O in progress! */
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ /* NOTE: object now unlocked */
+ if (kva == NULL) {
+ kva = uvm_pagermapin(pps, npages, NULL, M_WAITOK);
+ }
+
+ /*
+ * ok, mapped in. our pages are PG_BUSY so they are not going to
+ * get touched (so we can look at "offset" without having to lock
+ * the object). set up for I/O.
+ */
+
+ /*
+ * fill out uio/iov
+ */
+
+ iov.iov_base = (caddr_t) kva;
+ wanted = npages << PAGE_SHIFT;
+ if (file_offset + wanted > uvn->u_size)
+ wanted = uvn->u_size - file_offset; /* XXX: needed? */
+ iov.iov_len = wanted;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = file_offset;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = rw;
+ uio.uio_resid = wanted;
+ uio.uio_procp = NULL;
+
+ /*
+ * do the I/O! (XXX: curproc?)
+ */
+
+ UVMHIST_LOG(maphist, "calling VOP",0,0,0,0);
+
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ vn_lock(vn, LK_EXCLUSIVE | LK_RETRY, curproc /*XXX*/);
+ /* NOTE: vnode now locked! */
+
+ if (rw == UIO_READ)
+ result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
+ else
+ result = VOP_WRITE(vn, &uio, 0, curproc->p_ucred);
+
+ if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
+ VOP_UNLOCK(vn, 0, curproc /*XXX*/);
+ /* NOTE: vnode now unlocked (unless vnislocked) */
+
+ UVMHIST_LOG(maphist, "done calling VOP",0,0,0,0);
+
+ /*
+ * result == unix style errno (0 == OK!)
+ *
+ * zero out rest of buffer (if needed)
+ */
+
+ if (result == 0) {
+ got = wanted - uio.uio_resid;
+
+ if (wanted && got == 0) {
+ result = EIO; /* XXX: error? */
+ } else if (got < PAGE_SIZE * npages && rw == UIO_READ) {
+ bzero((void *) (kva + got),
+ (npages << PAGE_SHIFT) - got);
+ }
+ }
+
+ /*
+ * now remove pager mapping
+ */
+ uvm_pagermapout(kva, npages);
+
+ /*
+ * now clean up the object (i.e. drop I/O count)
+ */
+
+ simple_lock(&uvn->u_obj.vmobjlock);
+ /* NOTE: object now locked! */
+
+ uvn->u_nio--; /* I/O DONE! */
+ if ((uvn->u_flags & UVM_VNODE_IOSYNC) != 0 && uvn->u_nio == 0) {
+ wakeup(&uvn->u_nio);
+ }
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ /* NOTE: object now unlocked! */
+
+ /*
+ * done!
+ */
+
+ UVMHIST_LOG(maphist, "<- done (result %d)", result,0,0,0);
+ if (result == 0)
+ return(VM_PAGER_OK);
+ else
+ return(VM_PAGER_ERROR);
+}
+
+/*
+ * uvm_vnp_uncache: disable "persisting" in a vnode... when last reference
+ * is gone we will kill the object (flushing dirty pages back to the vnode
+ * if needed).
+ *
+ * => returns TRUE if there was no uvm_object attached or if there was
+ * one and we killed it [i.e. if there is no active uvn]
+ * => called with the vnode VOP_LOCK'd [we will unlock it for I/O, if
+ * needed]
+ *
+ * => XXX: given that we now kill uvn's when a vnode is recycled (without
+ * having to hold a reference on the vnode) and given a working
+ * uvm_vnp_sync(), how does that effect the need for this function?
+ * [XXXCDC: seems like it can die?]
+ *
+ * => XXX: this function should DIE once we merge the VM and buffer
+ * cache.
+ *
+ * research shows that this is called in the following places:
+ * ext2fs_truncate, ffs_truncate, detrunc[msdosfs]: called when vnode
+ * changes sizes
+ * ext2fs_write, WRITE [ufs_readwrite], msdosfs_write: called when we
+ * are written to
+ * ex2fs_chmod, ufs_chmod: called if VTEXT vnode and the sticky bit
+ * is off
+ * ffs_realloccg: when we can't extend the current block and have
+ * to allocate a new one we call this [XXX: why?]
+ * nfsrv_rename, rename_files: called when the target filename is there
+ * and we want to remove it
+ * nfsrv_remove, sys_unlink: called on file we are removing
+ * nfsrv_access: if VTEXT and we want WRITE access and we don't uncache
+ * then return "text busy"
+ * nfs_open: seems to uncache any file opened with nfs
+ * vn_writechk: if VTEXT vnode and can't uncache return "text busy"
+ */
+
+boolean_t
+uvm_vnp_uncache(vp)
+ struct vnode *vp;
+{
+ struct uvm_vnode *uvn = &vp->v_uvm;
+
+ /*
+ * lock uvn part of the vnode and check to see if we need to do anything
+ */
+
+ simple_lock(&uvn->u_obj.vmobjlock);
+ if ((uvn->u_flags & UVM_VNODE_VALID) == 0 ||
+ (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ return(TRUE);
+ }
+
+ /*
+ * we have a valid, non-blocked uvn. clear persist flag.
+ * if uvn is currently active we can return now.
+ */
+
+ uvn->u_flags &= ~UVM_VNODE_CANPERSIST;
+ if (uvn->u_obj.uo_refs) {
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ return(FALSE);
+ }
+
+ /*
+ * uvn is currently persisting! we have to gain a reference to
+ * it so that we can call uvn_detach to kill the uvn.
+ */
+
+ VREF(vp); /* seems ok, even with VOP_LOCK */
+ uvn->u_obj.uo_refs++; /* value is now 1 */
+ simple_unlock(&uvn->u_obj.vmobjlock);
+
+
+#ifdef DEBUG
+ /*
+ * carry over sanity check from old vnode pager: the vnode should
+ * be VOP_LOCK'd, and we confirm it here.
+ */
+ if (!VOP_ISLOCKED(vp)) {
+ boolean_t is_ok_anyway = FALSE;
+#ifdef NFS
+ extern int (**nfsv2_vnodeop_p) __P((void *));
+ extern int (**spec_nfsv2nodeop_p) __P((void *));
+ extern int (**fifo_nfsv2nodeop_p) __P((void *));
+
+ /* vnode is NOT VOP_LOCKed: some vnode types _never_ lock */
+ if (vp->v_op == nfsv2_vnodeop_p ||
+ vp->v_op == spec_nfsv2nodeop_p) {
+ is_ok_anyway = TRUE;
+ }
+ if (vp->v_op == fifo_nfsv2nodeop_p) {
+ is_ok_anyway = TRUE;
+ }
+#endif /* NFS */
+ if (!is_ok_anyway)
+ panic("uvm_vnp_uncache: vnode not locked!");
+ }
+#endif /* DEBUG */
+
+ /*
+ * now drop our reference to the vnode. if we have the sole
+ * reference to the vnode then this will cause it to die [as we
+ * just cleared the persist flag]. we have to unlock the vnode
+ * while we are doing this as it may trigger I/O.
+ *
+ * XXX: it might be possible for uvn to get reclaimed while we are
+ * unlocked causing us to return TRUE when we should not. we ignore
+ * this as a false-positive return value doesn't hurt us.
+ */
+ VOP_UNLOCK(vp, 0, curproc /*XXX*/);
+ uvn_detach(&uvn->u_obj);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc/*XXX*/);
+
+ /*
+ * and return...
+ */
+
+ return(TRUE);
+}
+
+/*
+ * uvm_vnp_setsize: grow or shrink a vnode uvn
+ *
+ * grow => just update size value
+ * shrink => toss un-needed pages
+ *
+ * => we assume that the caller has a reference of some sort to the
+ * vnode in question so that it will not be yanked out from under
+ * us.
+ *
+ * called from:
+ * => truncate fns (ext2fs_truncate, ffs_truncate, detrunc[msdos])
+ * => "write" fns (ext2fs_write, WRITE [ufs/ufs], msdosfs_write, nfs_write)
+ * => ffs_balloc [XXX: why? doesn't WRITE handle?]
+ * => NFS: nfs_loadattrcache, nfs_getattrcache, nfs_setattr
+ * => union fs: union_newsize
+ */
+
+void
+uvm_vnp_setsize(vp, newsize)
+ struct vnode *vp;
+ u_quad_t newsize;
+{
+ struct uvm_vnode *uvn = &vp->v_uvm;
+
+ /*
+ * lock uvn and check for valid object, and if valid: do it!
+ */
+ simple_lock(&uvn->u_obj.vmobjlock);
+ if (uvn->u_flags & UVM_VNODE_VALID) {
+
+ /*
+ * make sure that the newsize fits within a vaddr_t
+ * XXX: need to revise addressing data types
+ */
+
+ if (newsize > (vaddr_t) -PAGE_SIZE) {
+#ifdef DEBUG
+ printf("uvm_vnp_setsize: vn %p size truncated "
+ "%qx->%lx\n", vp, newsize, (vaddr_t)-PAGE_SIZE);
+#endif
+ newsize = (vaddr_t)-PAGE_SIZE;
+ }
+
+ /*
+ * now check if the size has changed: if we shrink we had better
+ * toss some pages...
+ */
+
+ if (uvn->u_size > newsize) {
+ (void)uvn_flush(&uvn->u_obj, (vaddr_t) newsize,
+ uvn->u_size, PGO_FREE);
+ }
+ uvn->u_size = (vaddr_t)newsize;
+ }
+ simple_unlock(&uvn->u_obj.vmobjlock);
+
+ /*
+ * done
+ */
+ return;
+}
+
+/*
+ * uvm_vnp_sync: flush all dirty VM pages back to their backing vnodes.
+ *
+ * => called from sys_sync with no VM structures locked
+ * => only one process can do a sync at a time (because the uvn
+ * structure only has one queue for sync'ing). we ensure this
+ * by holding the uvn_sync_lock while the sync is in progress.
+ * other processes attempting a sync will sleep on this lock
+ * until we are done.
+ */
+
+void
+uvm_vnp_sync(mp)
+ struct mount *mp;
+{
+ struct uvm_vnode *uvn;
+ struct vnode *vp;
+ boolean_t got_lock;
+
+ /*
+ * step 1: ensure we are only ones using the uvn_sync_q by locking
+ * our lock...
+ */
+ lockmgr(&uvn_sync_lock, LK_EXCLUSIVE, (void *)0, curproc /*XXX*/);
+
+ /*
+ * step 2: build up a simpleq of uvns of interest based on the
+ * write list. we gain a reference to uvns of interest. must
+ * be careful about locking uvn's since we will be holding uvn_wl_lock
+ * in the body of the loop.
+ */
+ SIMPLEQ_INIT(&uvn_sync_q);
+ simple_lock(&uvn_wl_lock);
+ for (uvn = uvn_wlist.lh_first ; uvn != NULL ;
+ uvn = uvn->u_wlist.le_next) {
+
+ vp = (struct vnode *) uvn;
+ if (mp && vp->v_mount != mp)
+ continue;
+
+ /* attempt to gain reference */
+ while ((got_lock = simple_lock_try(&uvn->u_obj.vmobjlock)) ==
+ FALSE &&
+ (uvn->u_flags & UVM_VNODE_BLOCKED) == 0)
+ /* spin */ ;
+
+ /*
+ * we will exit the loop if either if the following are true:
+ * - we got the lock [always true if NCPU == 1]
+ * - we failed to get the lock but noticed the vnode was
+ * "blocked" -- in this case the vnode must be a dying
+ * vnode, and since dying vnodes are in the process of
+ * being flushed out, we can safely skip this one
+ *
+ * we want to skip over the vnode if we did not get the lock,
+ * or if the vnode is already dying (due to the above logic).
+ *
+ * note that uvn must already be valid because we found it on
+ * the wlist (this also means it can't be ALOCK'd).
+ */
+ if (!got_lock || (uvn->u_flags & UVM_VNODE_BLOCKED) != 0) {
+ if (got_lock)
+ simple_unlock(&uvn->u_obj.vmobjlock);
+ continue; /* skip it */
+ }
+
+ /*
+ * gain reference. watch out for persisting uvns (need to
+ * regain vnode REF).
+ */
+ if (uvn->u_obj.uo_refs == 0)
+ VREF(vp);
+ uvn->u_obj.uo_refs++;
+ simple_unlock(&uvn->u_obj.vmobjlock);
+
+ /*
+ * got it!
+ */
+ SIMPLEQ_INSERT_HEAD(&uvn_sync_q, uvn, u_syncq);
+ }
+ simple_unlock(&uvn_wl_lock);
+
+ /*
+ * step 3: we now have a list of uvn's that may need cleaning.
+ * we are holding the uvn_sync_lock, but have dropped the uvn_wl_lock
+ * (so we can now safely lock uvn's again).
+ */
+
+ for (uvn = uvn_sync_q.sqh_first ; uvn ; uvn = uvn->u_syncq.sqe_next) {
+ simple_lock(&uvn->u_obj.vmobjlock);
+#ifdef DIAGNOSTIC
+ if (uvn->u_flags & UVM_VNODE_DYING) {
+ printf("uvm_vnp_sync: dying vnode on sync list\n");
+ }
+#endif
+ uvn_flush(&uvn->u_obj, 0, 0,
+ PGO_CLEANIT|PGO_ALLPAGES|PGO_DOACTCLUST);
+
+ /*
+ * if we have the only reference and we just cleaned the uvn,
+ * then we can pull it out of the UVM_VNODE_WRITEABLE state
+ * thus allowing us to avoid thinking about flushing it again
+ * on later sync ops.
+ */
+ if (uvn->u_obj.uo_refs == 1 &&
+ (uvn->u_flags & UVM_VNODE_WRITEABLE)) {
+ LIST_REMOVE(uvn, u_wlist);
+ uvn->u_flags &= ~UVM_VNODE_WRITEABLE;
+ }
+
+ simple_unlock(&uvn->u_obj.vmobjlock);
+
+ /* now drop our reference to the uvn */
+ uvn_detach(&uvn->u_obj);
+ }
+
+ /*
+ * done! release sync lock
+ */
+ lockmgr(&uvn_sync_lock, LK_RELEASE, (void *)0, curproc /*XXX*/);
+}
diff --git a/sys/uvm/uvm_vnode.h b/sys/uvm/uvm_vnode.h
new file mode 100644
index 00000000000..edd4f7b698a
--- /dev/null
+++ b/sys/uvm/uvm_vnode.h
@@ -0,0 +1,110 @@
+/* $NetBSD: uvm_vnode.h,v 1.6 1998/08/13 02:11:04 eeh Exp $ */
+
+/*
+ * XXXCDC: "ROUGH DRAFT" QUALITY UVM PRE-RELEASE FILE!
+ * >>>USE AT YOUR OWN RISK, WORK IS NOT FINISHED<<<
+ */
+/*
+ *
+ * Copyright (c) 1997 Charles D. Cranor and Washington University.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Charles D. Cranor and
+ * Washington University.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * from: Id: uvm_vnode.h,v 1.1.2.4 1997/10/03 21:18:24 chuck Exp
+ */
+
+#ifndef _UVM_UVM_VNODE_H_
+#define _UVM_UVM_VNODE_H_
+
+/*
+ * uvm_vnode.h
+ *
+ * vnode handle into the VM system.
+ */
+
+/*
+ * the uvm_vnode structure. put at the top of the vnode data structure.
+ * this allows:
+ * (struct vnode *) == (struct uvm_vnode *) == (struct uvm_object *)
+ */
+
+struct uvm_vnode {
+ struct uvm_object u_obj; /* the actual VM object */
+ int u_flags; /* flags */
+ int u_nio; /* number of running I/O requests */
+ vsize_t u_size; /* size of object */
+
+ /* the following entry is locked by uvn_wl_lock */
+ LIST_ENTRY(uvm_vnode) u_wlist; /* list of writeable vnode objects */
+
+ /* the following entry is locked by uvn_sync_lock */
+ SIMPLEQ_ENTRY(uvm_vnode) u_syncq; /* vnode objects due for a "sync" */
+};
+
+/*
+ * u_flags values
+ */
+#define UVM_VNODE_VALID 0x001 /* we are attached to the vnode */
+#define UVM_VNODE_CANPERSIST 0x002 /* we can persist after ref == 0 */
+#define UVM_VNODE_ALOCK 0x004 /* uvn_attach is locked out */
+#define UVM_VNODE_DYING 0x008 /* final detach/terminate in
+ progress */
+#define UVM_VNODE_RELKILL 0x010 /* uvn should be killed by releasepg
+ when final i/o is done */
+#define UVM_VNODE_WANTED 0x020 /* someone is waiting for alock,
+ dying, or relkill to clear */
+#define UVM_VNODE_VNISLOCKED 0x040 /* underlying vnode struct is locked
+ (valid when DYING is true) */
+#define UVM_VNODE_IOSYNC 0x080 /* I/O sync in progress ... setter
+ sleeps on &uvn->u_nio */
+#define UVM_VNODE_IOSYNCWANTED 0x100 /* a process is waiting for the
+ i/o sync to clear so it can do
+ i/o */
+#define UVM_VNODE_WRITEABLE 0x200 /* uvn has pages that are writeable */
+
+/*
+ * UVM_VNODE_BLOCKED: any condition that should new processes from
+ * touching the vnode [set WANTED and sleep to wait for it to clear]
+ */
+#define UVM_VNODE_BLOCKED (UVM_VNODE_ALOCK|UVM_VNODE_DYING|UVM_VNODE_RELKILL)
+
+
+/*
+ * prototypes
+ */
+
+#if 0
+/*
+ * moved uvn_attach to uvm_extern.h because uvm_vnode.h is needed to
+ * include sys/vnode.h, and files that include sys/vnode.h don't know
+ * what a vm_prot_t is.
+ */
+struct uvm_object *uvn_attach __P((void *, vm_prot_t));
+#endif
+
+#endif /* _UVM_UVM_VNODE_H_ */