summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjasper <jasper@openbsd.org>2010-12-14 20:24:25 +0000
committerjasper <jasper@openbsd.org>2010-12-14 20:24:25 +0000
commit10f5ff43374bcd4127827e94c645a10eae6c9afb (patch)
treef7c2e340b821aeae57147dceb17cb66a3b5f67fb
parentuse the dying flag in struct usbd_bus instead of a private dying flag (diff)
downloadwireguard-openbsd-10f5ff43374bcd4127827e94c645a10eae6c9afb.tar.xz
wireguard-openbsd-10f5ff43374bcd4127827e94c645a10eae6c9afb.zip
"Implement fast path TLB miss handling. Walk the page table without
creating a trapframe, with exceptions disabled and using only BANK1 registers. If a valid pte is found, load it and return. Otherwise create a trapframe and proceed to the full-blown C handler." from uwe@netbsd, ok miod@ speed-ups measured by miod@ and me were between 44% and 50%...
-rw-r--r--sys/arch/sh/sh/genassym.cf4
-rw-r--r--sys/arch/sh/sh/pmap.c7
-rw-r--r--sys/arch/sh/sh/vectors.S362
3 files changed, 301 insertions, 72 deletions
diff --git a/sys/arch/sh/sh/genassym.cf b/sys/arch/sh/sh/genassym.cf
index ab305f6ad15..1e2505fc704 100644
--- a/sys/arch/sh/sh/genassym.cf
+++ b/sys/arch/sh/sh/genassym.cf
@@ -1,4 +1,4 @@
-# $OpenBSD: genassym.cf,v 1.5 2008/06/26 05:42:13 ray Exp $
+# $OpenBSD: genassym.cf,v 1.6 2010/12/14 20:24:25 jasper Exp $
# $NetBSD: genassym.cf,v 1.10 2005/12/11 12:19:00 christos Exp $
#-
@@ -38,6 +38,7 @@ include <sh/fpu.h>
include <sh/locore.h>
include <sh/reg.h>
include <sh/vmparam.h>
+include <sh/pte.h>
struct trapframe
define TF_SIZE sizeof(struct trapframe)
@@ -72,6 +73,7 @@ struct uvmexp UVMEXP_
member intrs
export VM_MAXUSER_ADDRESS
+export VM_MIN_KERNEL_ADDRESS
export EFAULT
export ENAMETOOLONG
diff --git a/sys/arch/sh/sh/pmap.c b/sys/arch/sh/sh/pmap.c
index c8a624edcac..3adb1cd3305 100644
--- a/sys/arch/sh/sh/pmap.c
+++ b/sys/arch/sh/sh/pmap.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: pmap.c,v 1.16 2010/12/06 20:57:17 miod Exp $ */
+/* $OpenBSD: pmap.c,v 1.17 2010/12/14 20:24:25 jasper Exp $ */
/* $NetBSD: pmap.c,v 1.55 2006/08/07 23:19:36 tsutsui Exp $ */
/*-
@@ -57,6 +57,9 @@
struct pmap __pmap_kernel;
STATIC vaddr_t __pmap_kve; /* VA of last kernel virtual */
+/* For the fast tlb miss handler */
+pt_entry_t **curptd; /* p1 va of curlwp->...->pm_ptp */
+
/* pmap pool */
STATIC struct pool __pmap_pmap_pool;
@@ -277,7 +280,9 @@ pmap_activate(struct proc *p)
pmap->pm_asid = __pmap_asid_alloc();
KDASSERT(pmap->pm_asid >=0 && pmap->pm_asid < 256);
+
sh_tlb_set_asid(pmap->pm_asid);
+ curptd = pmap->pm_ptp;
}
int
diff --git a/sys/arch/sh/sh/vectors.S b/sys/arch/sh/sh/vectors.S
index a8f366bd17f..bb11814c90f 100644
--- a/sys/arch/sh/sh/vectors.S
+++ b/sys/arch/sh/sh/vectors.S
@@ -1,4 +1,4 @@
-/* $OpenBSD: vectors.S,v 1.7 2010/07/01 07:24:26 jasper Exp $ */
+/* $OpenBSD: vectors.S,v 1.8 2010/12/14 20:24:25 jasper Exp $ */
/* $NetBSD: exception_vector.S,v 1.19 2006/08/22 21:47:57 uwe Exp $ */
/*-
@@ -31,6 +31,7 @@
#include <sh/param.h>
#include <sh/locore.h>
+#include <sh/pte.h>
#include <sh/trap.h>
#include <sh/ubcreg.h>
#include <sh/mmu_sh3.h>
@@ -82,7 +83,7 @@ NENTRY(sh_vector_generic)
cmp/hi r1, r0
bt 1f
- /* tlb_exception(curlwp, trapframe, TEA); */
+ /* tlb_exception(curproc, trapframe, TEA); */
mov.l .Lg_VPN_MASK, r1
and r1, r6 /* va = trunc_page(va) */
__EXCEPTION_UNBLOCK(r0, r1)
@@ -138,68 +139,159 @@ VECTOR_END_MARKER(sh_vector_generic_end)
/*
* LINTSTUB: Var: char sh3_vector_tlbmiss[1];
*
- * void sh3_vector_tlbmiss(void);
- * Copied to VBR+0x400. This code should be position independent
- * and maximum 512 bytes long (== 0x600 - 0x400).
+ * TLB miss vector. We run through the fast path first, checking if
+ * there's a valid mapping in curproc or kernel pmap. We do fast path
+ * with exceptions disabled, so no P3 addresses please (including no
+ * kernel stack, as we cannot wire TLB entries on sh3). We can only
+ * use BANK1 registers, and of those r6 and r7 are already taken.
+ *
+ * If we don't find a valid mapping in the fast path, we do context
+ * save and call tlb exception handler.
+ *
+ * Copied to VBR+0x400. This code should be position independent
+ * and maximum 512 bytes long (== 0x600 - 0x400).
*/
NENTRY(sh3_vector_tlbmiss)
__EXCEPTION_ENTRY
- mov #(SH3_TEA & 0xff), r0
- mov.l @r0, r6 /* 3rd arg: va = TEA */
-
- /* if kernel stack is in P3, handle it here fast */
-#if !defined(P1_STACK)
- cmp/pz r6
- bt 6f /* userspace address, proceed to handler */
-
- /* Load kernel stack */
- mov.l .L3_VPN_MASK, r0
- and r6, r0 /* VPN */
-
- mov.l .L3_CURUPTE, r1
- mov.l @r1, r1 /* upte = &l->l_md.md_upte[0] */
- mov #UPAGES, r3 /* loop limit */
-
- /* for each page of u-area */
-4: mov.l @r1+, r7 /* upte->addr: u-area VPN */
- cmp/eq r7, r0 /* if (vpn == upte->addr) */
- bt/s 5f /* goto found; */
- dt r3
- bf/s 4b
- add #4, r1 /* skip upte->data; point to next md_upte[i] */
- /* not a page of u-area, proceed to handler */
- bra 7f /* pull insn at 6f into delay slot */
- mov #(SH3_EXPEVT & 0xff), r0
-
- /* load entry for this uarea page into tlb */
-5: mov #(SH3_PTEH & 0xff), r2
- mov.l @r1, r1 /* md_upte[i]->data */
- mov.l @r2, r3 /* save ASID */
- mov.l r0, @r2 /* SH3_PTEH = { VPN, ASID = 0 } */
- mov.l r1, @(4, r2) /* SH3_PTEL = md_upte[i]->data */
+ mov #(SH3_PTEH & 0xff), r4
+ mov.l .L3_VPN_cleanup, r0
+ mov.l @r4, r5
+ and r0, r5 /* trim vpn to 4K page boundary */
+ /*
+ * For the duration of fast path we keep
+ * r4: SH3_PTEH - other PTE regs are addressable as @(offset, r4)
+ * r5: { VPN, ASID } that caused the miss
+ */
- ldtlb
- bra 99f /* return */
- mov.l r3, @r2 /* restore ASID */
-#endif /* !P1_STACK */
- /* tlb_exception(curproc, trapframe, tea) */
-6: mov #(SH3_EXPEVT & 0xff), r0
-7: mov.l @r0, r0
- mov.l r0, @(TF_EXPEVT, r14) /* trapframe->tf_expevt = EXPEVT */
- mov.l .L3_curproc, r0
- mov.l @r0, r4 /* 1st arg: curproc */
+ cmp/pz r5 /* user space address? */
+ bt/s .L3_user_va
+ mov r5, r2 /* copy of vpn to compute indices into ptd/ptp */
+
+ /*
+ * kernel space address, use pmap_kernel(), adjust vpn for indexing
+ * see __pmap_kpte_lookup
+ */
+.L3_kernel_va:
+ mov.l .L3_VM_MIN_KERNEL_ADDRESS, r0
+ mov.l .L3_kernptd, r1 /* pmap_kernel()->pm_ptp */
+ bra .L3_fetch_pte
+ sub r0, r2 /* vpn -= VM_MIN_KERNEL_ADDRESS */
+
+ /* user space address, use curproc's pmap */
+.L3_user_va:
+ mov.l .L3_curptd, r1 ! curproc->...->pm_ptp
+
+ /* see __pmap_pte_lookup */
+.L3_fetch_pte:
+ mov.l @r1, r3 /* fetch ptd */
+
+ /*
+ * r2: vpn, prepared for indexing into ptd
+ * r3: pt_entry_t **ptd => pt_entry_t *ptp => pt_entry_t pte
+ */
+#ifdef DEBUG
+ tst r3, r3 /* ptd == NULL - cannot happen */
+ bt/s .L3_call_tlb_exception
+#endif
+ mov #-22, r1 /* __PMAP_PTP_SHIFT */
+
+ /* __PMAP_PTP_INDEX(vpn) */
+ mov r2, r0
+ shld r1, r0 /* vpn >> __PMAP_PTP_SHIFT */
+ mov.l .L3_ptp_index_mask, r1
+ and r1, r0 /* ... & (__PMAP_PTP_N - 1) */
+ shll2 r0 /* array index -> array offset */
+ mov.l @(r0, r3), r3 /* ptp = ptd[idx] */
+ tst r3, r3 /* if (ptp == NULL) */
+ bt/s .L3_call_tlb_exception
+ mov #-(PGSHIFT - 2), r1
+
+ /*
+ * __PMAP_PTP_OFSET(vpn) - except we pre-shift 2 bits left to
+ * get the array offset directly, as we know bits 10 and 11
+ * are zero (we cleaned them in r5 to get 4K aligned VPN)
+ */
+ shld r1, r2 /* vpn >> (PGSHIFT - 2) */
+ mov.l .L3_ptp_offset_mask, r0
+ and r2, r0 /* ... & ((__PMAP_PTP_PG_N - 1) << 2) */
+ mov.l @(r0, r3), r3 /* pte = ptp[idx] */
+
+
+ /* r3: pte */
+ /* r4: SH3_PTEH */
+ /* r5: { VPN, ASID } */
+
+ mov.l .L3_PG_V, r0
+ tst r0, r3 /* if ((pte & PG_V) == 0) */
+ bt/s .L3_call_tlb_exception
+ nop
+
+ mov.l .L3_PG_HW_BITS, r1
+ cmp/pz r5 /* user space address? */
+ and r1, r3 /* pte &= PG_HW_BITS */
+ bf/s .L3_load_kernel
+ mov.l r3, @(0x04, r4) /* *SH3_PTEL = pte */
+
+ /*
+ * load mapping for a user space page
+ * we reload PTEH to enter VPN aligned to 4K page boundary
+ */
+.L3_load_user:
+ mov.l r5, @r4 /* *SH3_PTEH = { VPN, ASID } */
+ ldtlb /* needs 2 insns padding before RTE */
+ nop
+ nop
+ rte
+ nop
+
+ /*
+ * load mapping for a kernel space page
+ * we need to temporary set ASID to 0
+ */
+.L3_load_kernel:
+ mov.l .L3_clear_ASID, r1
+ and r5, r1 /* *SH3_PTEH & ~SH3_PTEH_ASID_MASK */
+ mov.l r1, @r4 /* *SH3_PTEH = { VPN, ASID = 0 } */
+ ldtlb
+ mov.l r5, @r4 /* restore ASID */
+ nop
+ rte
+ nop
+
+
+ /*
+ * If we haven't found a valid mapping in the fast path
+ * tlb_exception(curproc, trapframe, tea)
+ */
+.L3_call_tlb_exception:
+ __EXCEPTION_ENTRY
+ mov.l .L3_SH3_EXPEVT, r2
+ mov.l .L3_curproc, r1
+ mov #(SH3_TEA & 0xff), r0
+ mov.l @r2, r2 /* *SH3_EXPEVT */
+ mov.l @r0, r6 /* arg3: va = *SH3_TEA */
+ mov.l @r1, r4 /* arg1: curproc */
__INTR_MASK(r0, r1)
__EXCEPTION_UNBLOCK(r0, r1)
mov.l .L3_tlb_exception, r0
+ ov.l r2, @(TF_EXPEVT, r14) /* tf->tf_expevt = EXPEVT */
jsr @r0
- mov r14, r5 /* 2nd arg: trap frame */
-99: __EXCEPTION_RETURN
+ mov r14, r5 /* arg2: trapframe */
+ __EXCEPTION_RETURN
- .align 5
-.L3_curproc: .long _C_LABEL(cpu_info_store) + CI_CURPROC
-.L3_tlb_exception: .long _C_LABEL(tlb_exception)
-.L3_VPN_MASK: .long 0xfffff000
-.L3_CURUPTE: .long _C_LABEL(curupte)
+ .align 4
+.L3_VPN_cleanup: .long ~0x00000c00
+.L3_curptd: .long _C_LABEL(curptd)
+.L3_kernptd: .long _C_LABEL(__pmap_kernel)
+.L3_VM_MIN_KERNEL_ADDRESS: .long VM_MIN_KERNEL_ADDRESS
+.L3_ptp_index_mask: .long 0x1ff
+.L3_ptp_offset_mask: .long 0x3ff << 2
+.L3_PG_HW_BITS: .long PG_HW_BITS
+.L3_PG_V: .long PG_V
+.L3_clear_ASID: .long ~SH3_PTEH_ASID_MASK
+.L3_SH3_EXPEVT: .long SH3_EXPEVT
+.L3_curproc: .long _C_LABEL(cpu_info_store) + CI_CURPROC
+.L3_tlb_exception: .long _C_LABEL(tlb_exception)
/* LINTSTUB: Var: char sh3_vector_tlbmiss_end[1]; */
VECTOR_END_MARKER(sh3_vector_tlbmiss_end)
@@ -211,31 +303,161 @@ VECTOR_END_MARKER(sh3_vector_tlbmiss_end)
/*
* LINTSTUB: Var: char sh4_vector_tlbmiss[1];
*
- * void sh4_vector_tlbmiss(void);
- * Copied to VBR+0x400. This code should be position independent
- * and maximum 512 bytes long (== 0x600 - 0x400).
+ * TLB miss vector. We run through the fast path first, checking if
+ * there's a valid mapping in curproc or kernel pmap. We do fast path
+ * with exceptions disabled, so no P3 addresses please (though we can
+ * use kernel stack if need be, as its TLB entries are wired). We can
+ * only use BANK1 registers, and of those r6 and r7 are already taken.
+ *
+ * If we don't find a valid mapping in the fast path, we do context
+ * save and call tlb exception handler.
+ *
+ * Copied to VBR+0x400. This code should be relocatable
+ * and maximum 512 bytes long (== 0x600 - 0x400).
*/
NENTRY(sh4_vector_tlbmiss)
+ mov.l .L4_SH4_PTEH, r4
+ mov.l .L4_VPN_cleanup, r0
+ mov.l @r4, r5
+ and r0, r5 /* trim vpn to 4K page boundary */
+ /*
+ * For the duration of fast path we keep
+ * r4: SH4_PTEH - other PTE regs are addressable as @(offset, r4)
+ * r5: { VPN, ASID } that caused the miss
+ */
+
+ cmp/pz r5 /* user space address? */
+ bt/s .L4_user_va
+ mov r5, r2 /* copy of vpn to compute indices into ptd/ptp */
+
+ /*
+ * kernel space address, use pmap_kernel(), adjust vpn for indexing
+ * see __pmap_kpte_lookup
+ */
+.L4_kernel_va:
+ mov.l .L4_VM_MIN_KERNEL_ADDRESS, r0
+ mov.l .L4_kernptd, r1 /* pmap_kernel()->pm_ptp */
+ bra .L4_fetch_pte
+ sub r0, r2 /* vpn -= VM_MIN_KERNEL_ADDRESS */
+
+ /* user space address, use curproc's pmap */
+.L4_user_va:
+ mov.l .L4_curptd, r1 /* curproc->...->pm_ptp */
+
+ /* see __pmap_pte_lookup */
+.L4_fetch_pte:
+ mov.l @r1, r3 /* fetch ptd */
+
+ /*
+ * r2: vpn, prepared for indexing into ptd
+ * r3: pt_entry_t **ptd => pt_entry_t *ptp => pt_entry_t pte
+ */
+#ifdef DEBUG
+ tst r3, r3 /* ptd == NULL - cannot happen */
+ bt/s .L4_call_tlb_exception
+#endif
+ mov #-22, r1 /* __PMAP_PTP_SHIFT */
+
+ /* __PMAP_PTP_INDEX(vpn) */
+ mov r2, r0
+ shld r1, r0 /* vpn >> __PMAP_PTP_SHIFT */
+ mov.l .L4_ptp_index_mask, r1
+ and r1, r0 /* ... & (__PMAP_PTP_N - 1) */
+ shll2 r0 /* array index -> array offset */
+ mov.l @(r0, r3), r3 /* ptp = ptd[idx] */
+ tst r3, r3 /* if (ptp == NULL) */
+ bt/s .L4_call_tlb_exception
+ mov #-(PGSHIFT - 2), r1
+
+ /*
+ * __PMAP_PTP_OFSET(vpn) - except we pre-shift 2 bits left to
+ * get the array offset directly, as we know bits 10 and 11
+ * are zero (we cleaned them in r5 to get 4K aligned VPN)
+ */
+ shld r1, r2 /* vpn >> (PGSHIFT - 2) */
+ mov.l .L4_ptp_offset_mask, r0
+ and r2, r0 /* ... & ((__PMAP_PTP_PG_N - 1) << 2) */
+ mov.l @(r0, r3), r3 /* pte = ptp[idx] */
+
+
+ /* r3: pte */
+ /* r4: SH4_PTEH */
+ /* r5: { VPN, ASID } */
+
+ mov.l .L4_PG_V, r0
+ tst r0, r3 /* if ((pte & PG_V) == 0) */
+ bt/s .L4_call_tlb_exception
+ mov r3, r0 /* prepare PCMCIA SA bits for SH4_PTEA */
+
+ mov.l .L4_PG_HW_BITS, r1
+ shlr8 r0
+ and r1, r3 /* pte &= PG_HW_BITS */
+ shlr r0 /* pte >> _PG_PCMCIA_SHIFT */
+ cmp/pz r5 /* user space address? */
+ and #SH4_PTEA_SA_MASK, r0
+ mov.l r3, @(0x04, r4) /* *SH4_PTEL = pte */
+ bf/s .L4_load_kernel
+ mov.l r0, @(0x34, r4) /* *SH4_PTEA = PCMCIA space attrs */
+
+ /*
+ * Load mapping for a user space page
+ * we reload PTEH to enter VPN aligned to 4K page boundary
+ */
+.L4_load_user:
+ mov.l r5, @r4 /* *SH4_PTEH = { VPN, ASID } */
+ ldtlb /* needs 1 insn padding before RTE */
+ nop
+ rte
+ nop
+
+ /*
+ * Load mapping for a kernel space page
+ * we need to temporary set ASID to 0
+ */
+.L4_load_kernel:
+ mov.l .L4_clear_ASID, r1
+ and r5, r1 /* *SH4_PTEH & ~SH4_PTEH_ASID_MASK */
+ mov.l r1, @r4 /* *SH4_PTEH = { VPN, ASID = 0 } */
+ ldtlb
+ mov.l r5, @r4 /* restore ASID */
+ rte
+ nop
+
+
+ /*
+ * If we haven't found a valid mapping in the fast path
+ * tlb_exception(curproc, trapframe, tea)
+ */
+.L4_call_tlb_exception:
__EXCEPTION_ENTRY
- mov.l .L4_TEA4, r0
- mov.l @r0, r6
- mov.l .L4_EXPEVT4, r0
- mov.l @r0, r0
- mov.l r0, @(TF_EXPEVT, r14) /* trapframe->tf_expevt = EXPEVT */
- mov.l .L4_curproc, r0
- mov.l @r0, r4 /* 1st arg */
+ mov.l .L4_SH4_PTEH, r0
+ mov.l .L4_curproc, r1
+ mov.l @(0x24, r0), r2 /* *SH4_EXPEVT */
+ mov.l @(0x0c, r0), r6 /* arg3: va = *SH4_TEA */
+ mov.l @r1, r4 /* arg1: curproc */
__INTR_MASK(r0, r1)
__EXCEPTION_UNBLOCK(r0, r1)
mov.l .L4_tlb_exception, r0
+ mov.l r2, @(TF_EXPEVT, r14) /* tf->tf_expevt = EXPEVT */
jsr @r0
- mov r14, r5 /* 2nd arg */
+ mov r14, r5 /* arg2: trapframe */
__EXCEPTION_RETURN
.align 5
-.L4_tlb_exception: .long _C_LABEL(tlb_exception)
-.L4_curproc: .long _C_LABEL(cpu_info_store) + CI_CURPROC
-.L4_EXPEVT4: .long SH4_EXPEVT
-.L4_TEA4: .long SH4_TEA
+
+.L4_SH4_PTEH: .long SH4_PTEH
+.L4_VPN_cleanup: .long ~0x00000c00
+.L4_curptd: .long _C_LABEL(curptd)
+.L4_kernptd: .long _C_LABEL(__pmap_kernel)
+.L4_VM_MIN_KERNEL_ADDRESS: .long VM_MIN_KERNEL_ADDRESS
+.L4_ptp_index_mask: .long 0x1ff
+.L4_ptp_offset_mask: .long 0x3ff << 2
+.L4_PG_HW_BITS: .long PG_HW_BITS
+.L4_PG_V: .long PG_V
+.L4_clear_ASID: .long ~SH4_PTEH_ASID_MASK
+.L4_curproc: .long _C_LABEL(cpu_info_store) + CI_CURPROC
+.L4_tlb_exception: .long _C_LABEL(tlb_exception)
+
/* LINTSTUB: Var: char sh4_vector_tlbmiss_end[1]; */
VECTOR_END_MARKER(sh4_vector_tlbmiss_end)