diff options
author | Christophe Leroy <christophe.leroy@csgroup.eu> | 2020-11-27 00:10:05 +1100 |
---|---|---|
committer | Michael Ellerman <mpe@ellerman.id.au> | 2020-12-04 01:01:10 +1100 |
commit | ab037dd87a2f946556850e204c06cbd7a2a19390 (patch) | |
tree | 4265cd2ac29adba67ac32725b9e21c3c97712483 /arch/powerpc/kernel/vdso64 | |
parent | powerpc/vdso: Save and restore TOC pointer on PPC64 (diff) | |
download | linux-dev-ab037dd87a2f946556850e204c06cbd7a2a19390.tar.xz linux-dev-ab037dd87a2f946556850e204c06cbd7a2a19390.zip |
powerpc/vdso: Switch VDSO to generic C implementation.
With the C VDSO, the performance is slightly lower, but it is worth
it as it will ease maintenance and evolution, and also brings clocks
that are not supported with the ASM VDSO.
On an 8xx at 132 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 828 nsec/call
clock-getres-realtime-coarse: vdso: 391 nsec/call
clock-gettime-realtime-coarse: vdso: 614 nsec/call
clock-getres-realtime: vdso: 460 nsec/call
clock-gettime-realtime: vdso: 876 nsec/call
clock-getres-monotonic-coarse: vdso: 399 nsec/call
clock-gettime-monotonic-coarse: vdso: 691 nsec/call
clock-getres-monotonic: vdso: 460 nsec/call
clock-gettime-monotonic: vdso: 1026 nsec/call
On an 8xx at 132 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 955 nsec/call
clock-getres-realtime-coarse: vdso: 545 nsec/call
clock-gettime-realtime-coarse: vdso: 592 nsec/call
clock-getres-realtime: vdso: 545 nsec/call
clock-gettime-realtime: vdso: 941 nsec/call
clock-getres-monotonic-coarse: vdso: 545 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
clock-getres-monotonic: vdso: 545 nsec/call
clock-gettime-monotonic: vdso: 940 nsec/call
It is even better for gettime with monotonic clocks.
Unsupported clocks with ASM VDSO:
clock-gettime-boottime: vdso: 3851 nsec/call
clock-gettime-tai: vdso: 3852 nsec/call
clock-gettime-monotonic-raw: vdso: 3396 nsec/call
Same clocks with C VDSO:
clock-gettime-tai: vdso: 941 nsec/call
clock-gettime-monotonic-raw: vdso: 1001 nsec/call
clock-gettime-monotonic-coarse: vdso: 591 nsec/call
On an 8321E at 333 MHz, vdsotest with the ASM VDSO:
gettimeofday: vdso: 220 nsec/call
clock-getres-realtime-coarse: vdso: 102 nsec/call
clock-gettime-realtime-coarse: vdso: 178 nsec/call
clock-getres-realtime: vdso: 129 nsec/call
clock-gettime-realtime: vdso: 235 nsec/call
clock-getres-monotonic-coarse: vdso: 105 nsec/call
clock-gettime-monotonic-coarse: vdso: 208 nsec/call
clock-getres-monotonic: vdso: 129 nsec/call
clock-gettime-monotonic: vdso: 274 nsec/call
On an 8321E at 333 MHz, vdsotest with the C VDSO:
gettimeofday: vdso: 272 nsec/call
clock-getres-realtime-coarse: vdso: 160 nsec/call
clock-gettime-realtime-coarse: vdso: 184 nsec/call
clock-getres-realtime: vdso: 166 nsec/call
clock-gettime-realtime: vdso: 281 nsec/call
clock-getres-monotonic-coarse: vdso: 160 nsec/call
clock-gettime-monotonic-coarse: vdso: 184 nsec/call
clock-getres-monotonic: vdso: 169 nsec/call
clock-gettime-monotonic: vdso: 275 nsec/call
On a Power9 Nimbus DD2.2 at 3.8GHz, with the ASM VDSO:
clock-gettime-monotonic: vdso: 35 nsec/call
clock-getres-monotonic: vdso: 16 nsec/call
clock-gettime-monotonic-coarse: vdso: 18 nsec/call
clock-getres-monotonic-coarse: vdso: 522 nsec/call
clock-gettime-monotonic-raw: vdso: 598 nsec/call
clock-getres-monotonic-raw: vdso: 520 nsec/call
clock-gettime-realtime: vdso: 34 nsec/call
clock-getres-realtime: vdso: 16 nsec/call
clock-gettime-realtime-coarse: vdso: 18 nsec/call
clock-getres-realtime-coarse: vdso: 517 nsec/call
getcpu: vdso: 8 nsec/call
gettimeofday: vdso: 25 nsec/call
And with the C VDSO:
clock-gettime-monotonic: vdso: 37 nsec/call
clock-getres-monotonic: vdso: 20 nsec/call
clock-gettime-monotonic-coarse: vdso: 21 nsec/call
clock-getres-monotonic-coarse: vdso: 19 nsec/call
clock-gettime-monotonic-raw: vdso: 38 nsec/call
clock-getres-monotonic-raw: vdso: 20 nsec/call
clock-gettime-realtime: vdso: 37 nsec/call
clock-getres-realtime: vdso: 20 nsec/call
clock-gettime-realtime-coarse: vdso: 20 nsec/call
clock-getres-realtime-coarse: vdso: 19 nsec/call
getcpu: vdso: 8 nsec/call
gettimeofday: vdso: 28 nsec/call
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20201126131006.2431205-8-mpe@ellerman.id.au
Diffstat (limited to 'arch/powerpc/kernel/vdso64')
-rw-r--r-- | arch/powerpc/kernel/vdso64/Makefile | 23 | ||||
-rw-r--r-- | arch/powerpc/kernel/vdso64/gettimeofday.S | 242 | ||||
-rw-r--r-- | arch/powerpc/kernel/vdso64/vdso64.lds.S | 2 |
3 files changed, 25 insertions, 242 deletions
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile index dfd34f68bfa1..4a8c5e4d25c0 100644 --- a/arch/powerpc/kernel/vdso64/Makefile +++ b/arch/powerpc/kernel/vdso64/Makefile @@ -1,8 +1,20 @@ # SPDX-License-Identifier: GPL-2.0 # List of files in the vdso, has to be asm only for now +ARCH_REL_TYPE_ABS := R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_PPC_ADDR16|R_PPC_ADDR16_LO|R_PPC_ADDR16_HI|R_PPC_ADDR16_HA|R_PPC_ADDR14|R_PPC_ADDR14_BRTAKEN|R_PPC_ADDR14_BRNTAKEN +include $(srctree)/lib/vdso/Makefile + obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) + CFLAGS_vgettimeofday.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) + CFLAGS_vgettimeofday.o += $(call cc-option, -fno-stack-protector) + CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING + CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables + CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) +endif + # Build rules targets := $(obj-vdso64) vdso64.so vdso64.so.dbg @@ -11,6 +23,7 @@ obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) GCOV_PROFILE := n KCOV_INSTRUMENT := n UBSAN_SANITIZE := n +KASAN_SANITIZE := n ccflags-y := -shared -fno-common -fno-builtin -nostdlib \ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both @@ -20,12 +33,14 @@ obj-y += vdso64_wrapper.o targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) +$(obj)/vgettimeofday.o: %.o: %.c FORCE + # Force dependency (incbin is bad) $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so # link rule for the .so file, .lds has to be first -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) FORCE - $(call if_changed,vdso64ld) +$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday.o FORCE + $(call if_changed,vdso64ld_and_check) # strip rule for the .so file $(obj)/%.so: OBJCOPYFLAGS := -S @@ -33,8 +48,8 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # actual build commands -quiet_cmd_vdso64ld = VDSO64L $@ - cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) +quiet_cmd_vdso64ld_and_check = VDSO64L $@ + cmd_vdso64ld_and_check = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) # install commands for the unstripped file quiet_cmd_vdso_install = INSTALL $@ diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index 20f8be40c653..d7a7bfb51081 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -12,6 +12,7 @@ #include <asm/vdso_datapage.h> #include <asm/asm-offsets.h> #include <asm/unistd.h> +#include <asm/vdso/gettimeofday.h> .text /* @@ -21,31 +22,7 @@ * */ V_FUNCTION_BEGIN(__kernel_gettimeofday) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds tv */ - mr r10,r4 /* r10 holds tz */ - get_datapage r3, r0 - cmpldi r11,0 /* check if tv is NULL */ - beq 2f - lis r7,1000000@ha /* load up USEC_PER_SEC */ - addi r7,r7,1000000@l - bl V_LOCAL_FUNC(__do_get_tspec) /* get sec/us from tb & kernel */ - std r4,TVAL64_TV_SEC(r11) /* store sec in tv */ - std r5,TVAL64_TV_USEC(r11) /* store usec in tv */ -2: cmpldi r10,0 /* check if tz is NULL */ - beq 1f - lwz r4,CFG_TZ_MINUTEWEST(r3)/* fill tz */ - lwz r5,CFG_TZ_DSTTIME(r3) - stw r4,TZONE_TZ_MINWEST(r10) - stw r5,TZONE_TZ_DSTTIME(r10) -1: mtlr r12 - crclr cr0*4+so - li r3,0 /* always success */ - blr - .cfi_endproc + cvdso_call __c_kernel_gettimeofday V_FUNCTION_END(__kernel_gettimeofday) @@ -56,120 +33,7 @@ V_FUNCTION_END(__kernel_gettimeofday) * */ V_FUNCTION_BEGIN(__kernel_clock_gettime) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - - cmpwi cr5,r3,CLOCK_REALTIME_COARSE - cmpwi cr6,r3,CLOCK_MONOTONIC_COARSE - cror cr5*4+eq,cr5*4+eq,cr6*4+eq - - cror cr0*4+eq,cr0*4+eq,cr5*4+eq - bne cr0,99f - - mflr r12 /* r12 saves lr */ - .cfi_register lr,r12 - mr r11,r4 /* r11 saves tp */ - get_datapage r3, r0 - lis r7,NSEC_PER_SEC@h /* want nanoseconds */ - ori r7,r7,NSEC_PER_SEC@l - beq cr5,70f -50: bl V_LOCAL_FUNC(__do_get_tspec) /* get time from tb & kernel */ - bne cr1,80f /* if not monotonic, all done */ - - /* - * CLOCK_MONOTONIC - */ - - /* now we must fixup using wall to monotonic. We need to snapshot - * that value and do the counter trick again. Fortunately, we still - * have the counter value in r8 that was returned by __do_get_tspec. - * At this point, r4,r5 contain our sec/nsec values. - */ - - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* We now have our result in r6,r9. We create a fake dependency - * on that result and re-check the counter - */ - or r0,r6,r9 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 50b - b 78f - - /* - * For coarse clocks we get data directly from the vdso data page, so - * we don't need to call __do_get_tspec, but we still need to do the - * counter trick. - */ -70: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 70b - add r3,r3,r0 /* r0 is already 0 */ - - /* - * CLOCK_REALTIME_COARSE, below values are needed for MONOTONIC_COARSE - * too - */ - ld r4,STAMP_XTIME_SEC(r3) - ld r5,STAMP_XTIME_NSEC(r3) - bne cr6,75f - - /* CLOCK_MONOTONIC_COARSE */ - ld r6,WTOM_CLOCK_SEC(r3) - lwa r9,WTOM_CLOCK_NSEC(r3) - - /* check if counter has updated */ - or r0,r6,r9 -75: or r0,r0,r4 - or r0,r0,r5 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld cr0,r0,r8 /* check if updated */ - bne- 70b - - /* Counter has not updated, so continue calculating proper values for - * sec and nsec if monotonic coarse, or just return with the proper - * values for realtime. - */ - bne cr6,80f - - /* Add wall->monotonic offset and check for overflow or underflow */ -78: add r4,r4,r6 - add r5,r5,r9 - cmpd cr0,r5,r7 - cmpdi cr1,r5,0 - blt 79f - subf r5,r7,r5 - addi r4,r4,1 -79: bge cr1,80f - addi r4,r4,-1 - add r5,r5,r7 - -80: std r4,TSPC64_TV_SEC(r11) - std r5,TSPC64_TV_NSEC(r11) - - mtlr r12 - crclr cr0*4+so - li r3,0 - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_gettime - .cfi_restore lr - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_gettime V_FUNCTION_END(__kernel_clock_gettime) @@ -180,34 +44,7 @@ V_FUNCTION_END(__kernel_clock_gettime) * */ V_FUNCTION_BEGIN(__kernel_clock_getres) - .cfi_startproc - /* Check for supported clock IDs */ - cmpwi cr0,r3,CLOCK_REALTIME - cmpwi cr1,r3,CLOCK_MONOTONIC - cror cr0*4+eq,cr0*4+eq,cr1*4+eq - bne cr0,99f - - mflr r12 - .cfi_register lr,r12 - get_datapage r3, r0 - lwz r5, CLOCK_HRTIMER_RES(r3) - mtlr r12 - li r3,0 - cmpldi cr0,r4,0 - crclr cr0*4+so - beqlr - std r3,TSPC64_TV_SEC(r4) - std r5,TSPC64_TV_NSEC(r4) - blr - - /* - * syscall fallback - */ -99: - li r0,__NR_clock_getres - sc - blr - .cfi_endproc + cvdso_call __c_kernel_clock_getres V_FUNCTION_END(__kernel_clock_getres) /* @@ -217,74 +54,5 @@ V_FUNCTION_END(__kernel_clock_getres) * */ V_FUNCTION_BEGIN(__kernel_time) - .cfi_startproc - mflr r12 - .cfi_register lr,r12 - - mr r11,r3 /* r11 holds t */ - get_datapage r3, r0 - - ld r4,STAMP_XTIME_SEC(r3) - - cmpldi r11,0 /* check if t is NULL */ - beq 2f - std r4,0(r11) /* store result at *t */ -2: mtlr r12 - crclr cr0*4+so - mr r3,r4 - blr - .cfi_endproc + cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) - - -/* - * This is the core of clock_gettime() and gettimeofday(), - * it returns the current time in r4 (seconds) and r5. - * On entry, r7 gives the resolution of r5, either USEC_PER_SEC - * or NSEC_PER_SEC, giving r5 in microseconds or nanoseconds. - * It expects the datapage ptr in r3 and doesn't clobber it. - * It clobbers r0, r6 and r9. - * On return, r8 contains the counter value that can be reused. - * This clobbers cr0 but not any other cr field. - */ -V_FUNCTION_BEGIN(__do_get_tspec) - .cfi_startproc - /* check for update count & load values */ -1: ld r8,CFG_TB_UPDATE_COUNT(r3) - andi. r0,r8,1 /* pending update ? loop */ - bne- 1b - xor r0,r8,r8 /* create dependency */ - add r3,r3,r0 - - /* Get TB & offset it. We use the MFTB macro which will generate - * workaround code for Cell. - */ - MFTB(r6) - ld r9,CFG_TB_ORIG_STAMP(r3) - subf r6,r9,r6 - - /* Scale result */ - ld r5,CFG_TB_TO_XS(r3) - sldi r6,r6,12 /* compute time since stamp_xtime */ - mulhdu r6,r6,r5 /* in units of 2^-32 seconds */ - - /* Add stamp since epoch */ - ld r4,STAMP_XTIME_SEC(r3) - lwz r5,STAMP_SEC_FRAC(r3) - or r0,r4,r5 - or r0,r0,r6 - xor r0,r0,r0 - add r3,r3,r0 - ld r0,CFG_TB_UPDATE_COUNT(r3) - cmpld r0,r8 /* check if updated */ - bne- 1b /* reload if so */ - - /* convert to seconds & nanoseconds and add to stamp */ - add r6,r6,r5 /* add on fractional seconds of xtime */ - mulhwu r5,r6,r7 /* compute micro or nanoseconds and */ - srdi r6,r6,32 /* seconds since stamp_xtime */ - clrldi r5,r5,32 - add r4,r4,r6 - blr - .cfi_endproc -V_FUNCTION_END(__do_get_tspec) diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S index 256fb9720298..71be083b24ed 100644 --- a/arch/powerpc/kernel/vdso64/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S @@ -61,7 +61,6 @@ SECTIONS .gcc_except_table : { *(.gcc_except_table) } .rela.dyn ALIGN(8) : { *(.rela.dyn) } - .opd ALIGN(8) : { KEEP (*(.opd)) } .got ALIGN(8) : { *(.got .toc) } _end = .; @@ -111,6 +110,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) + *(.opd) } } |