// SPDX-License-Identifier: GPL-2.0-only // Copyright (C) 2021 ARM Limited. // Original author: Mark Brown // // Scalable Matrix Extension ZA context switch test // Repeatedly writes unique test patterns into each ZA tile // and reads them back to verify integrity. // // for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done // (leave it running for as long as you want...) // kill $pids #include #include "assembler.h" #include "asm-offsets.h" #include "sme-inst.h" .arch_extension sve #define MAXVL 2048 #define MAXVL_B (MAXVL / 8) // Declare some storage space to shadow ZA register contents and a // scratch buffer for a vector. .pushsection .text .data .align 4 zaref: .space MAXVL_B * MAXVL_B scratch: .space MAXVL_B .popsection // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. // Clobbers x0-x3 function memcpy cmp x2, #0 b.eq 1f 0: ldrb w3, [x1], #1 strb w3, [x0], #1 subs x2, x2, #1 b.ne 0b 1: ret endfunction // Generate a test pattern for storage in ZA // x0: pid // x1: row in ZA // x2: generation // These values are used to constuct a 32-bit pattern that is repeated in the // scratch buffer as many times as will fit: // bits 31:28 generation number (increments once per test_loop) // bits 27:16 pid // bits 15: 8 row number // bits 7: 0 32-bit lane index function pattern mov w3, wzr bfi w3, w0, #16, #12 // PID bfi w3, w1, #8, #8 // Row bfi w3, w2, #28, #4 // Generation ldr x0, =scratch mov w1, #MAXVL_B / 4 0: str w3, [x0], #4 add w3, w3, #1 // Lane subs w1, w1, #1 b.ne 0b ret endfunction // Get the address of shadow data for ZA horizontal vector xn .macro _adrza xd, xn, nrtmp ldr \xd, =zaref rdsvl \nrtmp, 1 madd \xd, x\nrtmp, \xn, \xd .endm // Set up test pattern in a ZA horizontal vector // x0: pid // x1: row number // x2: generation function setup_za mov x4, x30 mov x12, x1 // Use x12 for vector select bl pattern // Get pattern in scratch buffer _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5 mov x5, x0 ldr x1, =scratch bl memcpy // length set up in x2 by _adrza _ldr_za 12, 5 // load vector w12 from pointer x5 ret x4 endfunction // Trivial memory compare: compare x2 bytes starting at address x0 with // bytes starting at address x1. // Returns only if all bytes match; otherwise, the program is aborted. // Clobbers x0-x5. function memcmp cbz x2, 2f stp x0, x1, [sp, #-0x20]! str x2, [sp, #0x10] mov x5, #0 0: ldrb w3, [x0, x5] ldrb w4, [x1, x5] add x5, x5, #1 cmp w3, w4 b.ne 1f subs x2, x2, #1 b.ne 0b 1: ldr x2, [sp, #0x10] ldp x0, x1, [sp], #0x20 b.ne barf 2: ret endfunction // Verify that a ZA vector matches its shadow in memory, else abort // x0: row number // Clobbers x0-x7 and x12. function check_za mov x3, x30 mov x12, x0 _adrza x5, x0, 6 // pointer to expected value in x5 mov x4, x0 ldr x7, =scratch // x7 is scratch mov x0, x7 // Poison scratch mov x1, x6 bl memfill_ae _str_za 12, 7 // save vector w12 to pointer x7 mov x0, x5 mov x1, x7 mov x2, x6 mov x30, x3 b memcmp endfunction // Any SME register modified here can cause corruption in the main // thread -- but *only* the locations modified here. function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random ZA data #if 0 adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #1 movi v9.16b, #2 movi v31.8b, #3 #endif ret endfunction function tickle_handler // Increment the signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] ret endfunction function terminate_handler mov w21, w0 mov x20, x2 puts "Terminated by signal " mov w0, w21 bl putdec puts ", no error, iterations=" ldr x0, [x20, #ucontext_regs + 8 * 22] bl putdec puts ", signals=" ldr x0, [x20, #ucontext_regs + 8 * 23] bl putdecn mov x0, #0 mov x8, #__NR_exit svc #0 endfunction // w0: signal number // x1: sa_action // w2: sa_flags // Clobbers x0-x6,x8 function setsignal str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! mov w4, w0 mov x5, x1 mov w6, w2 add x0, sp, #16 mov x1, #sa_sz bl memclr mov w0, w4 add x1, sp, #16 str w6, [x1, #sa_flags] str x5, [x1, #sa_handler] mov x2, #0 mov x3, #sa_mask_sz mov x8, #__NR_rt_sigaction svc #0 cbz w0, 1f puts "sigaction failure\n" b .Labort 1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) ret endfunction // Main program entry point .globl _start function _start _start: mov x23, #0 // signal count mov w0, #SIGINT adr x1, terminate_handler mov w2, #SA_SIGINFO bl setsignal mov w0, #SIGTERM adr x1, terminate_handler mov w2, #SA_SIGINFO bl setsignal mov w0, #SIGUSR1 adr x1, irritator_handler mov w2, #SA_SIGINFO orr w2, w2, #SA_NODEFER bl setsignal mov w0, #SIGUSR2 adr x1, tickle_handler mov w2, #SA_SIGINFO orr w2, w2, #SA_NODEFER bl setsignal puts "Streaming mode " smstart_za // Sanity-check and report the vector length rdsvl 19, 8 cmp x19, #128 b.lo 1f cmp x19, #2048 b.hi 1f tst x19, #(8 - 1) b.eq 2f 1: puts "bad vector length: " mov x0, x19 bl putdecn b .Labort 2: puts "vector length:\t" mov x0, x19 bl putdec puts " bits\n" // Obtain our PID, to ensure test pattern uniqueness between processes mov x8, #__NR_getpid svc #0 mov x20, x0 puts "PID:\t" mov x0, x20 bl putdecn mov x22, #0 // generation number, increments per iteration .Ltest_loop: rdsvl 0, 8 cmp x0, x19 b.ne vl_barf rdsvl 21, 1 // Set up ZA & shadow with test pattern 0: mov x0, x20 sub x1, x21, #1 mov x2, x22 bl setup_za subs x21, x21, #1 b.ne 0b mov x8, #__NR_sched_yield // encourage preemption 1: svc #0 mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0 and x1, x0, #3 cmp x1, #2 b.ne svcr_barf rdsvl 21, 1 // Verify that the data made it through rdsvl 24, 1 // Verify that the data made it through 0: sub x0, x24, x21 bl check_za subs x21, x21, #1 bne 0b add x22, x22, #1 // Everything still working b .Ltest_loop .Labort: mov x0, #0 mov x1, #SIGABRT mov x8, #__NR_kill svc #0 endfunction function barf // fpsimd.c acitivty log dump hack // ldr w0, =0xdeadc0de // mov w8, #__NR_exit // svc #0 // end hack smstop mov x10, x0 // expected data mov x11, x1 // actual data mov x12, x2 // data size puts "Mismatch: PID=" mov x0, x20 bl putdec puts ", iteration=" mov x0, x22 bl putdec puts ", row=" mov x0, x21 bl putdecn puts "\tExpected [" mov x0, x10 mov x1, x12 bl dumphex puts "]\n\tGot [" mov x0, x11 mov x1, x12 bl dumphex puts "]\n" mov x8, #__NR_getpid svc #0 // fpsimd.c acitivty log dump hack // ldr w0, =0xdeadc0de // mov w8, #__NR_exit // svc #0 // ^ end of hack mov x1, #SIGABRT mov x8, #__NR_kill svc #0 // mov x8, #__NR_exit // mov x1, #1 // svc #0 endfunction function vl_barf mov x10, x0 puts "Bad active VL: " mov x0, x10 bl putdecn mov x8, #__NR_exit mov x1, #1 svc #0 endfunction function svcr_barf mov x10, x0 puts "Bad SVCR: " mov x0, x10 bl putdecn mov x8, #__NR_exit mov x1, #1 svc #0 endfunction