From 4c5fe5db1ac6420ec9c0a62ab764572ef79472b3 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 19 Oct 2018 20:48:12 -0400 Subject: parisc: Optimze cache flush algorithms The attached patch implements three optimizations: 1) Loops in flush_user_dcache_range_asm, flush_kernel_dcache_range_asm, purge_kernel_dcache_range_asm, flush_user_icache_range_asm, and flush_kernel_icache_range_asm are unrolled to reduce branch overhead. 2) The static branch prediction for cmpb instructions in pacache.S have been reviewed and the operand order adjusted where necessary. 3) For flush routines in cache.c, we purge rather flush when we have no context. The pdc instruction at level 0 is not required to write back dirty lines to memory. This provides a performance improvement over the fdc instruction if the feature is implemented. Version 2 adds alternative patching. The patch provides an average improvement of about 2%. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/pacache.S | 216 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 202 insertions(+), 14 deletions(-) (limited to 'arch/parisc/kernel/pacache.S') diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S index b41c0136a05f..187f032c9dd8 100644 --- a/arch/parisc/kernel/pacache.S +++ b/arch/parisc/kernel/pacache.S @@ -838,8 +838,7 @@ ENTRY_CFI(flush_dcache_page_asm) add %r28, %r25, %r25 sub %r25, r31, %r25 - -1: fdc,m r31(%r28) +1: fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) @@ -854,7 +853,7 @@ ENTRY_CFI(flush_dcache_page_asm) fdc,m r31(%r28) fdc,m r31(%r28) fdc,m r31(%r28) - cmpb,COND(<<) %r28, %r25,1b + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ fdc,m r31(%r28) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -863,6 +862,67 @@ ENTRY_CFI(flush_dcache_page_asm) nop ENDPROC_CFI(flush_dcache_page_asm) +ENTRY_CFI(purge_dcache_page_asm) + ldil L%(TMPALIAS_MAP_START), %r28 +#ifdef CONFIG_64BIT +#if (TMPALIAS_MAP_START >= 0x80000000) + depdi 0, 31,32, %r28 /* clear any sign extension */ +#endif + convert_phys_for_tlb_insert20 %r26 /* convert phys addr to tlb insert format */ + depd %r25, 63,22, %r28 /* Form aliased virtual address 'to' */ + depdi 0, 63,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#else + extrw,u %r26, 24,25, %r26 /* convert phys addr to tlb insert format */ + depw %r25, 31,22, %r28 /* Form aliased virtual address 'to' */ + depwi 0, 31,PAGE_SHIFT, %r28 /* Clear any offset bits */ +#endif + + /* Purge any old translation */ + +#ifdef CONFIG_PA20 + pdtlb,l %r0(%r28) +#else + tlb_lock %r20,%r21,%r22 +0: pdtlb %r0(%r28) + tlb_unlock %r20,%r21,%r22 + ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB) +#endif + +88: ldil L%dcache_stride, %r1 + ldw R%dcache_stride(%r1), r31 + +#ifdef CONFIG_64BIT + depdi,z 1, 63-PAGE_SHIFT,1, %r25 +#else + depwi,z 1, 31-PAGE_SHIFT,1, %r25 +#endif + add %r28, %r25, %r25 + sub %r25, r31, %r25 + +1: pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + pdc,m r31(%r28) + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ + pdc,m r31(%r28) + +89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) + sync + bv %r0(%r2) + nop +ENDPROC_CFI(purge_dcache_page_asm) + ENTRY_CFI(flush_icache_page_asm) ldil L%(TMPALIAS_MAP_START), %r28 #ifdef CONFIG_64BIT @@ -908,7 +968,6 @@ ENTRY_CFI(flush_icache_page_asm) add %r28, %r25, %r25 sub %r25, %r31, %r25 - /* fic only has the type 26 form on PA1.1, requiring an * explicit space specification, so use %sr4 */ 1: fic,m %r31(%sr4,%r28) @@ -926,7 +985,7 @@ ENTRY_CFI(flush_icache_page_asm) fic,m %r31(%sr4,%r28) fic,m %r31(%sr4,%r28) fic,m %r31(%sr4,%r28) - cmpb,COND(<<) %r28, %r25,1b + cmpb,COND(>>) %r25, %r28, 1b /* predict taken */ fic,m %r31(%sr4,%r28) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -947,7 +1006,6 @@ ENTRY_CFI(flush_kernel_dcache_page_asm) add %r26, %r25, %r25 sub %r25, %r23, %r25 - 1: fdc,m %r23(%r26) fdc,m %r23(%r26) fdc,m %r23(%r26) @@ -963,7 +1021,7 @@ ENTRY_CFI(flush_kernel_dcache_page_asm) fdc,m %r23(%r26) fdc,m %r23(%r26) fdc,m %r23(%r26) - cmpb,COND(<<) %r26, %r25,1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ fdc,m %r23(%r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -999,7 +1057,7 @@ ENTRY_CFI(purge_kernel_dcache_page_asm) pdc,m %r23(%r26) pdc,m %r23(%r26) pdc,m %r23(%r26) - cmpb,COND(<<) %r26, %r25, 1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ pdc,m %r23(%r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -1014,7 +1072,33 @@ ENTRY_CFI(flush_user_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25, 1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + fdc,m %r23(%sr3, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fdc,m %r23(%sr3, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b fdc,m %r23(%sr3, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_DCACHE, INSN_NOP) @@ -1029,7 +1113,33 @@ ENTRY_CFI(flush_kernel_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + fdc,m %r23(%r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fdc,m %r23(%r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ fdc,m %r23(%r26) sync @@ -1045,7 +1155,33 @@ ENTRY_CFI(purge_kernel_dcache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + pdc,m %r23(%r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + pdc,m %r23(%r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ pdc,m %r23(%r26) sync @@ -1061,7 +1197,33 @@ ENTRY_CFI(flush_user_icache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25,1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + fic,m %r23(%sr3, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fic,m %r23(%sr3, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b fic,m %r23(%sr3, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -1098,7 +1260,7 @@ ENTRY_CFI(flush_kernel_icache_page) fic,m %r23(%sr4, %r26) fic,m %r23(%sr4, %r26) fic,m %r23(%sr4, %r26) - cmpb,COND(<<) %r26, %r25, 1b + cmpb,COND(>>) %r25, %r26, 1b /* predict taken */ fic,m %r23(%sr4, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) @@ -1113,7 +1275,33 @@ ENTRY_CFI(flush_kernel_icache_range_asm) ldo -1(%r23), %r21 ANDCM %r26, %r21, %r26 -1: cmpb,COND(<<),n %r26, %r25, 1b +#ifdef CONFIG_64BIT + depd,z %r23, 59, 60, %r21 +#else + depw,z %r23, 27, 28, %r21 +#endif + add %r26, %r21, %r22 + cmpb,COND(>>),n %r22, %r25, 2f /* predict not taken */ +1: add %r22, %r21, %r22 + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + fic,m %r23(%sr4, %r26) + cmpb,COND(<<=) %r22, %r25, 1b /* predict taken */ + fic,m %r23(%sr4, %r26) + +2: cmpb,COND(>>),n %r25, %r26, 2b /* predict taken */ fic,m %r23(%sr4, %r26) 89: ALTERNATIVE(88b, 89b, ALT_COND_NO_ICACHE, INSN_NOP) -- cgit v1.2.3-59-g8ed1b