ARM: convert all "mov.* pc, reg" to "bx reg" for ARMv6+

ARMv6 and greater introduced a new instruction ("bx") which can be used to return from function calls. Recent CPUs perform better when the "bx lr" instruction is used rather than the "mov pc, lr" instruction, and this sequence is strongly recommended to be used by the ARM architecture manual (section A.4.1.1). We provide a new macro "ret" with all its variants for the condition code which will resolve to the appropriate instruction. Rather than doing this piecemeal, and miss some instances, change all the "mov pc" instances to use the new macro, with the exception of the "movs" instruction and the kprobes code. This allows us to detect the "mov pc, lr" case and fix it up - and also gives us the possibility of deploying this for other registers depending on the CPU selection. Reported-by: Will Deacon <will.deacon@arm.com> Tested-by: Stephen Warren <swarren@nvidia.com> # Tegra Jetson TK1 Tested-by: Robert Jarzmik <robert.jarzmik@free.fr> # mioa701_bootresume.S Tested-by: Andrew Lunn <andrew@lunn.ch> # Kirkwood Tested-by: Shawn Guo <shawn.guo@freescale.com> Tested-by: Tony Lindgren <tony@atomide.com> # OMAPs Tested-by: Gregory CLEMENT <gregory.clement@free-electrons.com> # Armada XP, 375, 385 Acked-by: Sekhar Nori <nsekhar@ti.com> # DaVinci Acked-by: Christoffer Dall <christoffer.dall@linaro.org> # kvm/hyp Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com> # PXA3xx Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com> # Xen Tested-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de> # ARMv7M Tested-by: Simon Horman <horms+renesas@verge.net.au> # Shmobile Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
author: Russell King <rmk+kernel@arm.linux.org.uk> 2014-06-30 16:29:12 +0100
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2014-07-18 12:29:04 +0100
commit: 6ebbf2ce437b33022d30badd49dc94d33ecfa498 (patch)
tree: bc015e35b456a28bb0e501803a454dc0c0d3291a /arch/arm/vfp
parent: ARM: make it easier to check the CPU part number correctly (diff)
download: linux-dev-6ebbf2ce437b33022d30badd49dc94d33ecfa498.tar.xz
linux-dev-6ebbf2ce437b33022d30badd49dc94d33ecfa498.zip
2 files changed, 15 insertions, 15 deletions
diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index fe6ca574d093..2e78760f3495 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -34,7 +34,7 @@ ENDPROC(do_vfp)
 
 ENTRY(vfp_null_entry)
 	dec_preempt_count_ti r10, r4
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_null_entry)
 
 	.align	2
@@ -49,7 +49,7 @@ ENTRY(vfp_testing_entry)
 	dec_preempt_count_ti r10, r4
 	ldr	r0, VFP_arch_address
 	str	r0, [r0]		@ set to non-zero value
-	mov	pc, r9			@ we have handled the fault
+	ret	r9			@ we have handled the fault
 ENDPROC(vfp_testing_entry)
 
 	.align	2
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index be807625ed8c..cda654cbf2c2 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -183,7 +183,7 @@ vfp_hw_state_valid:
 					@ always subtract 4 from the following
 					@ instruction address.
 	dec_preempt_count_ti r10, r4
-	mov	pc, r9			@ we think we have handled things
+	ret	r9			@ we think we have handled things
 
 
 look_for_VFP_exceptions:
@@ -202,7 +202,7 @@ look_for_VFP_exceptions:
 
 	DBGSTR	"not VFP"
 	dec_preempt_count_ti r10, r4
-	mov	pc, lr
+	ret	lr
 
 process_exception:
 	DBGSTR	"bounce"
@@ -234,7 +234,7 @@ ENTRY(vfp_save_state)
 	VFPFMRX	r12, FPINST2		@ FPINST2 if needed (and present)
 1:
 	stmia	r0, {r1, r2, r3, r12}	@ save FPEXC, FPSCR, FPINST, FPINST2
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_save_state)
 
 	.align
@@ -245,7 +245,7 @@ vfp_current_hw_state_address:
 #ifdef CONFIG_THUMB2_KERNEL
 	adr	\tmp, 1f
 	add	\tmp, \tmp, \base, lsl \shift
-	mov	pc, \tmp
+	ret	\tmp
 #else
 	add	pc, pc, \base, lsl \shift
 	mov	r0, r0
@@ -257,10 +257,10 @@ ENTRY(vfp_get_float)
 	tbl_branch r0, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mrc	p10, 0, r0, c\dr, c0, 0	@ fmrs	r0, s0
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 1:	mrc	p10, 0, r0, c\dr, c0, 4	@ fmrs	r0, s1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 ENDPROC(vfp_get_float)
@@ -269,10 +269,10 @@ ENTRY(vfp_put_float)
 	tbl_branch r1, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mcr	p10, 0, r0, c\dr, c0, 0	@ fmsr	r0, s0
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 1:	mcr	p10, 0, r0, c\dr, c0, 4	@ fmsr	r0, s1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 ENDPROC(vfp_put_float)
@@ -281,14 +281,14 @@ ENTRY(vfp_get_double)
 	tbl_branch r0, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	fmrrd	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #ifdef CONFIG_VFPv3
 	@ d16 - d31 registers
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mrrc	p11, 3, r0, r1, c\dr	@ fmrrd	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #endif
@@ -296,21 +296,21 @@ ENTRY(vfp_get_double)
 	@ virtual register 16 (or 32 if VFPv3) for compare with zero
 	mov	r0, #0
 	mov	r1, #0
-	mov	pc, lr
+	ret	lr
 ENDPROC(vfp_get_double)
 
 ENTRY(vfp_put_double)
 	tbl_branch r2, r3, #3
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	fmdrr	d\dr, r0, r1
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #ifdef CONFIG_VFPv3
 	@ d16 - d31 registers
 	.irp	dr,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
 1:	mcrr	p11, 3, r0, r1, c\dr	@ fmdrr	r0, r1, d\dr
-	mov	pc, lr
+	ret	lr
 	.org	1b + 8
 	.endr
 #endif
author	Russell King <rmk+kernel@arm.linux.org.uk>	2014-06-30 16:29:12 +0100
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2014-07-18 12:29:04 +0100
commit	6ebbf2ce437b33022d30badd49dc94d33ecfa498 (patch)
tree	bc015e35b456a28bb0e501803a454dc0c0d3291a /arch/arm/vfp
parent	ARM: make it easier to check the CPU part number correctly (diff)
download	linux-dev-6ebbf2ce437b33022d30badd49dc94d33ecfa498.tar.xz linux-dev-6ebbf2ce437b33022d30badd49dc94d33ecfa498.zip