aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/kernel/intvec_64.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-06 11:14:33 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-06 11:14:33 -0700
commit4de9ad9bc08b4953fc03336ad38908496e2f8826 (patch)
treebd44add223061a58317034a0d6c9686d95d12fba /arch/tile/kernel/intvec_64.S
parentMerge tag 'arm64-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/cmarinas/linux-aarch64 (diff)
parenttile: refresh tile defconfig files (diff)
downloadlinux-dev-4de9ad9bc08b4953fc03336ad38908496e2f8826.tar.xz
linux-dev-4de9ad9bc08b4953fc03336ad38908496e2f8826.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile
Pull Tile arch updates from Chris Metcalf: "These changes bring in a bunch of new functionality that has been maintained internally at Tilera over the last year, plus other stray bits of work that I've taken into the tile tree from other folks. The changes include some PCI root complex work, interrupt-driven console support, support for performing fast-path unaligned data fixups by kernel-based JIT code generation, CONFIG_PREEMPT support, vDSO support for gettimeofday(), a serial driver for the tilegx on-chip UART, KGDB support, more optimized string routines, support for ftrace and kprobes, improved ASLR, and many bug fixes. We also remove support for the old TILE64 chip, which is no longer buildable" * git://git.kernel.org/pub/scm/linux/kernel/git/cmetcalf/linux-tile: (85 commits) tile: refresh tile defconfig files tile: rework <asm/cmpxchg.h> tile PCI RC: make default consistent DMA mask 32-bit tile: add null check for kzalloc in tile/kernel/setup.c tile: make __write_once a synonym for __read_mostly tile: remove support for TILE64 tile: use asm-generic/bitops/builtin-*.h tile: eliminate no-op "noatomichash" boot argument tile: use standard tile_bundle_bits type in traps.c tile: simplify code referencing hypervisor API addresses tile: change <asm/system.h> to <asm/switch_to.h> in comments tile: mark pcibios_init() as __init tile: check for correct compiler earlier in asm-offsets.c tile: use standard 'generic-y' model for <asm/hw_irq.h> tile: use asm-generic version of <asm/local64.h> tile PCI RC: add comment about "PCI hole" problem tile: remove DEBUG_EXTRA_FLAGS kernel config option tile: add virt_to_kpte() API and clean up and document behavior tile: support FRAME_POINTER tile: support reporting Tilera hypervisor statistics ...
Diffstat (limited to 'arch/tile/kernel/intvec_64.S')
-rw-r--r--arch/tile/kernel/intvec_64.S305
1 files changed, 275 insertions, 30 deletions
diff --git a/arch/tile/kernel/intvec_64.S b/arch/tile/kernel/intvec_64.S
index 85d483957027..ec755d3f3734 100644
--- a/arch/tile/kernel/intvec_64.S
+++ b/arch/tile/kernel/intvec_64.S
@@ -17,25 +17,33 @@
#include <linux/linkage.h>
#include <linux/errno.h>
#include <linux/unistd.h>
+#include <linux/init.h>
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/irqflags.h>
#include <asm/asm-offsets.h>
#include <asm/types.h>
+#include <asm/traps.h>
#include <asm/signal.h>
#include <hv/hypervisor.h>
#include <arch/abi.h>
#include <arch/interrupts.h>
#include <arch/spr_def.h>
-#ifdef CONFIG_PREEMPT
-# error "No support for kernel preemption currently"
-#endif
-
#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
+#if CONFIG_KERNEL_PL == 1 || CONFIG_KERNEL_PL == 2
+/*
+ * Set "result" non-zero if ex1 holds the PL of the kernel
+ * (with or without ICS being set). Note this works only
+ * because we never find the PL at level 3.
+ */
+# define IS_KERNEL_EX1(result, ex1) andi result, ex1, CONFIG_KERNEL_PL
+#else
+# error Recode IS_KERNEL_EX1 for CONFIG_KERNEL_PL
+#endif
.macro push_reg reg, ptr=sp, delta=-8
{
@@ -98,6 +106,185 @@
}
.endm
+ /*
+ * Unalign data exception fast handling: In order to handle
+ * unaligned data access, a fast JIT version is generated and stored
+ * in a specific area in user space. We first need to do a quick poke
+ * to see if the JIT is available. We use certain bits in the fault
+ * PC (3 to 9 is used for 16KB page size) as index to address the JIT
+ * code area. The first 64bit word is the fault PC, and the 2nd one is
+ * the fault bundle itself. If these 2 words both match, then we
+ * directly "iret" to JIT code. If not, a slow path is invoked to
+ * generate new JIT code. Note: the current JIT code WILL be
+ * overwritten if it existed. So, ideally we can handle 128 unalign
+ * fixups via JIT. For lookup efficiency and to effectively support
+ * tight loops with multiple unaligned reference, a simple
+ * direct-mapped cache is used.
+ *
+ * SPR_EX_CONTEXT_K_0 is modified to return to JIT code.
+ * SPR_EX_CONTEXT_K_1 has ICS set.
+ * SPR_EX_CONTEXT_0_0 is setup to user program's next PC.
+ * SPR_EX_CONTEXT_0_1 = 0.
+ */
+ .macro int_hand_unalign_fast vecnum, vecname
+ .org (\vecnum << 8)
+intvec_\vecname:
+ /* Put r3 in SPR_SYSTEM_SAVE_K_1. */
+ mtspr SPR_SYSTEM_SAVE_K_1, r3
+
+ mfspr r3, SPR_EX_CONTEXT_K_1
+ /*
+ * Examine if exception comes from user without ICS set.
+ * If not, just go directly to the slow path.
+ */
+ bnez r3, hand_unalign_slow_nonuser
+
+ mfspr r3, SPR_SYSTEM_SAVE_K_0
+
+ /* Get &thread_info->unalign_jit_tmp[0] in r3. */
+ bfexts r3, r3, 0, CPU_SHIFT-1
+ mm r3, zero, LOG2_THREAD_SIZE, 63
+ addli r3, r3, THREAD_INFO_UNALIGN_JIT_TMP_OFFSET
+
+ /*
+ * Save r0, r1, r2 into thread_info array r3 points to
+ * from low to high memory in order.
+ */
+ st_add r3, r0, 8
+ st_add r3, r1, 8
+ {
+ st_add r3, r2, 8
+ andi r2, sp, 7
+ }
+
+ /* Save stored r3 value so we can revert it on a page fault. */
+ mfspr r1, SPR_SYSTEM_SAVE_K_1
+ st r3, r1
+
+ {
+ /* Generate a SIGBUS if sp is not 8-byte aligned. */
+ bnez r2, hand_unalign_slow_badsp
+ }
+
+ /*
+ * Get the thread_info in r0; load r1 with pc. Set the low bit of sp
+ * as an indicator to the page fault code in case we fault.
+ */
+ {
+ ori sp, sp, 1
+ mfspr r1, SPR_EX_CONTEXT_K_0
+ }
+
+ /* Add the jit_info offset in thread_info; extract r1 [3:9] into r2. */
+ {
+ addli r0, r3, THREAD_INFO_UNALIGN_JIT_BASE_OFFSET - \
+ (THREAD_INFO_UNALIGN_JIT_TMP_OFFSET + (3 * 8))
+ bfextu r2, r1, 3, (2 + PAGE_SHIFT - UNALIGN_JIT_SHIFT)
+ }
+
+ /* Load the jit_info; multiply r2 by 128. */
+ {
+ ld r0, r0
+ shli r2, r2, UNALIGN_JIT_SHIFT
+ }
+
+ /*
+ * If r0 is NULL, the JIT page is not mapped, so go to slow path;
+ * add offset r2 to r0 at the same time.
+ */
+ {
+ beqz r0, hand_unalign_slow
+ add r2, r0, r2
+ }
+
+ /*
+ * We are loading from userspace (both the JIT info PC and
+ * instruction word, and the instruction word we executed)
+ * and since either could fault while holding the interrupt
+ * critical section, we must tag this region and check it in
+ * do_page_fault() to handle it properly.
+ */
+ENTRY(__start_unalign_asm_code)
+
+ /* Load first word of JIT in r0 and increment r2 by 8. */
+ ld_add r0, r2, 8
+
+ /*
+ * Compare the PC with the 1st word in JIT; load the fault bundle
+ * into r1.
+ */
+ {
+ cmpeq r0, r0, r1
+ ld r1, r1
+ }
+
+ /* Go to slow path if PC doesn't match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * Load the 2nd word of JIT, which is supposed to be the fault
+ * bundle for a cache hit. Increment r2; after this bundle r2 will
+ * point to the potential start of the JIT code we want to run.
+ */
+ ld_add r0, r2, 8
+
+ /* No further accesses to userspace are done after this point. */
+ENTRY(__end_unalign_asm_code)
+
+ /* Compare the real bundle with what is saved in the JIT area. */
+ {
+ cmpeq r0, r1, r0
+ mtspr SPR_EX_CONTEXT_0_1, zero
+ }
+
+ /* Go to slow path if the fault bundle does not match. */
+ beqz r0, hand_unalign_slow
+
+ /*
+ * A cache hit is found.
+ * r2 points to start of JIT code (3rd word).
+ * r0 is the fault pc.
+ * r1 is the fault bundle.
+ * Reset the low bit of sp.
+ */
+ {
+ mfspr r0, SPR_EX_CONTEXT_K_0
+ andi sp, sp, ~1
+ }
+
+ /* Write r2 into EX_CONTEXT_K_0 and increment PC. */
+ {
+ mtspr SPR_EX_CONTEXT_K_0, r2
+ addi r0, r0, 8
+ }
+
+ /*
+ * Set ICS on kernel EX_CONTEXT_K_1 in order to "iret" to
+ * user with ICS set. This way, if the JIT fixup causes another
+ * unalign exception (which shouldn't be possible) the user
+ * process will be terminated with SIGBUS. Also, our fixup will
+ * run without interleaving with external interrupts.
+ * Each fixup is at most 14 bundles, so it won't hold ICS for long.
+ */
+ {
+ movei r1, PL_ICS_EX1(USER_PL, 1)
+ mtspr SPR_EX_CONTEXT_0_0, r0
+ }
+
+ {
+ mtspr SPR_EX_CONTEXT_K_1, r1
+ addi r3, r3, -(3 * 8)
+ }
+
+ /* Restore r0..r3. */
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld_add r2, r3, 8
+ ld r3, r3
+
+ iret
+ ENDPROC(intvec_\vecname)
+ .endm
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
@@ -118,15 +305,21 @@ intvec_feedback:
* The "processing" argument specifies the code for processing
* the interrupt. Defaults to "handle_interrupt".
*/
- .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
- .org (\vecnum << 8)
+ .macro __int_hand vecnum, vecname, c_routine,processing=handle_interrupt
intvec_\vecname:
/* Temporarily save a register so we have somewhere to work. */
mtspr SPR_SYSTEM_SAVE_K_1, r0
mfspr r0, SPR_EX_CONTEXT_K_1
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ /*
+ * The unalign data fastpath code sets the low bit in sp to
+ * force us to reset it here on fault.
+ */
+ {
+ blbs sp, 2f
+ IS_KERNEL_EX1(r0, r0)
+ }
.ifc \vecnum, INT_DOUBLE_FAULT
/*
@@ -176,15 +369,15 @@ intvec_\vecname:
}
.endif
-
+2:
/*
- * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
- * the current stack top in the higher bits. So we recover
- * our stack top by just masking off the low bits, then
+ * SYSTEM_SAVE_K_0 holds the cpu number in the high bits, and
+ * the current stack top in the lower bits. So we recover
+ * our starting stack value by sign-extending the low bits, then
* point sp at the top aligned address on the actual stack page.
*/
mfspr r0, SPR_SYSTEM_SAVE_K_0
- mm r0, zero, LOG2_THREAD_SIZE, 63
+ bfexts r0, r0, 0, CPU_SHIFT-1
0:
/*
@@ -206,6 +399,9 @@ intvec_\vecname:
* cache line 1: r6...r13
* cache line 0: 2 x frame, r0..r5
*/
+#if STACK_TOP_DELTA != 64
+#error STACK_TOP_DELTA must be 64 for assumptions here and in task_pt_regs()
+#endif
andi r0, r0, -64
/*
@@ -305,7 +501,7 @@ intvec_\vecname:
mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
.else
.ifc \vecnum, INT_ILL_TRANS
- mfspr r2, ILL_TRANS_REASON
+ mfspr r2, ILL_VA_PC
.else
.ifc \vecnum, INT_DOUBLE_FAULT
mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
@@ -315,12 +511,10 @@ intvec_\vecname:
.else
.ifc \c_routine, op_handle_perf_interrupt
mfspr r2, PERF_COUNT_STS
-#if CHIP_HAS_AUX_PERF_COUNTERS()
.else
.ifc \c_routine, op_handle_aux_perf_interrupt
mfspr r2, AUX_PERF_COUNT_STS
.endif
-#endif
.endif
.endif
.endif
@@ -339,7 +533,7 @@ intvec_\vecname:
#ifdef __COLLECT_LINKER_FEEDBACK__
.pushsection .text.intvec_feedback,"ax"
.org (\vecnum << 5)
- FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt1, 1 << 8)
+ FEEDBACK_ENTER_EXPLICIT(intvec_\vecname, .intrpt, 1 << 8)
jrp lr
.popsection
#endif
@@ -455,11 +649,12 @@ intvec_\vecname:
/*
* If we will be returning to the kernel, we will need to
* reset the interrupt masks to the state they had before.
- * Set DISABLE_IRQ in flags iff we came from PL1 with irqs disabled.
+ * Set DISABLE_IRQ in flags iff we came from kernel pl with
+ * irqs disabled.
*/
mfspr r32, SPR_EX_CONTEXT_K_1
{
- andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r22, r22)
PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
}
beqzt r32, 1f /* zero if from user space */
@@ -503,7 +698,7 @@ intvec_\vecname:
}
{
shl16insli r21, r21, hw1(__per_cpu_offset)
- bfextu r20, r20, 0, LOG2_THREAD_SIZE-1
+ bfextu r20, r20, CPU_SHIFT, 63
}
shl16insli r21, r21, hw0(__per_cpu_offset)
shl3add r20, r20, r21
@@ -585,7 +780,7 @@ intvec_\vecname:
.macro dc_dispatch vecnum, vecname
.org (\vecnum << 8)
intvec_\vecname:
- j hv_downcall_dispatch
+ j _hv_downcall_dispatch
ENDPROC(intvec_\vecname)
.endm
@@ -626,14 +821,36 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
}
ld r29, r29
- andi r29, r29, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(r29, r29)
{
beqzt r29, .Lresume_userspace
- PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ move r29, sp
+ }
+
+#ifdef CONFIG_PREEMPT
+ /* Returning to kernel space. Check if we need preemption. */
+ EXTRACT_THREAD_INFO(r29)
+ addli r28, r29, THREAD_INFO_FLAGS_OFFSET
+ {
+ ld r28, r28
+ addli r29, r29, THREAD_INFO_PREEMPT_COUNT_OFFSET
+ }
+ {
+ andi r28, r28, _TIF_NEED_RESCHED
+ ld4s r29, r29
}
+ beqzt r28, 1f
+ bnez r29, 1f
+ jal preempt_schedule_irq
+ FEEDBACK_REENTER(interrupt_return)
+1:
+#endif
/* If we're resuming to _cpu_idle_nap, bump PC forward by 8. */
- moveli r27, hw2_last(_cpu_idle_nap)
+ {
+ moveli r27, hw2_last(_cpu_idle_nap)
+ PTREGS_PTR(r29, PTREGS_OFFSET_PC)
+ }
{
ld r28, r29
shl16insli r27, r27, hw1(_cpu_idle_nap)
@@ -728,7 +945,7 @@ STD_ENTRY(interrupt_return)
PTREGS_PTR(r32, PTREGS_OFFSET_FLAGS)
}
{
- andi r0, r0, SPR_EX_CONTEXT_1_1__PL_MASK
+ IS_KERNEL_EX1(r0, r0)
ld r32, r32
}
bnez r0, 1f
@@ -799,7 +1016,7 @@ STD_ENTRY(interrupt_return)
pop_reg r21, sp, PTREGS_OFFSET_REG(31) - PTREGS_OFFSET_PC
{
mtspr SPR_EX_CONTEXT_K_1, lr
- andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
+ IS_KERNEL_EX1(lr, lr)
}
{
mtspr SPR_EX_CONTEXT_K_0, r21
@@ -1223,10 +1440,31 @@ STD_ENTRY(_sys_clone)
j sys_clone
STD_ENDPROC(_sys_clone)
-/* The single-step support may need to read all the registers. */
+ /*
+ * Recover r3, r2, r1 and r0 here saved by unalign fast vector.
+ * The vector area limit is 32 bundles, so we handle the reload here.
+ * r0, r1, r2 are in thread_info from low to high memory in order.
+ * r3 points to location the original r3 was saved.
+ * We put this code in the __HEAD section so it can be reached
+ * via a conditional branch from the fast path.
+ */
+ __HEAD
+hand_unalign_slow:
+ andi sp, sp, ~1
+hand_unalign_slow_badsp:
+ addi r3, r3, -(3 * 8)
+ ld_add r0, r3, 8
+ ld_add r1, r3, 8
+ ld r2, r3
+hand_unalign_slow_nonuser:
+ mfspr r3, SPR_SYSTEM_SAVE_K_1
+ __int_hand INT_UNALIGN_DATA, UNALIGN_DATA_SLOW, int_unalign
+
+/* The unaligned data support needs to read all the registers. */
int_unalign:
push_extra_callee_saves r0
- j do_trap
+ j do_unaligned
+ENDPROC(hand_unalign_slow)
/* Fill the return address stack with nonzero entries. */
STD_ENTRY(fill_ra_stack)
@@ -1240,8 +1478,15 @@ STD_ENTRY(fill_ra_stack)
4: jrp r0
STD_ENDPROC(fill_ra_stack)
-/* Include .intrpt1 array of interrupt vectors */
- .section ".intrpt1", "ax"
+ .macro int_hand vecnum, vecname, c_routine, processing=handle_interrupt
+ .org (\vecnum << 8)
+ __int_hand \vecnum, \vecname, \c_routine, \processing
+ .endm
+
+/* Include .intrpt array of interrupt vectors */
+ .section ".intrpt", "ax"
+ .global intrpt_start
+intrpt_start:
#define op_handle_perf_interrupt bad_intr
#define op_handle_aux_perf_interrupt bad_intr
@@ -1272,7 +1517,7 @@ STD_ENTRY(fill_ra_stack)
int_hand INT_SWINT_1, SWINT_1, SYSCALL, handle_syscall
int_hand INT_SWINT_0, SWINT_0, do_trap
int_hand INT_ILL_TRANS, ILL_TRANS, do_trap
- int_hand INT_UNALIGN_DATA, UNALIGN_DATA, int_unalign
+ int_hand_unalign_fast INT_UNALIGN_DATA, UNALIGN_DATA
int_hand INT_DTLB_MISS, DTLB_MISS, do_page_fault
int_hand INT_DTLB_ACCESS, DTLB_ACCESS, do_page_fault
int_hand INT_IDN_FIREWALL, IDN_FIREWALL, do_hardwall_trap