/* * Copyright 2015, Cyril Bur, IBM Corp. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include "../basic_asm.h" # POS MUST BE 16 ALIGNED! #define PUSH_VMX(pos,reg) \ li reg,pos; \ stvx v20,reg,sp; \ addi reg,reg,16; \ stvx v21,reg,sp; \ addi reg,reg,16; \ stvx v22,reg,sp; \ addi reg,reg,16; \ stvx v23,reg,sp; \ addi reg,reg,16; \ stvx v24,reg,sp; \ addi reg,reg,16; \ stvx v25,reg,sp; \ addi reg,reg,16; \ stvx v26,reg,sp; \ addi reg,reg,16; \ stvx v27,reg,sp; \ addi reg,reg,16; \ stvx v28,reg,sp; \ addi reg,reg,16; \ stvx v29,reg,sp; \ addi reg,reg,16; \ stvx v30,reg,sp; \ addi reg,reg,16; \ stvx v31,reg,sp; # POS MUST BE 16 ALIGNED! #define POP_VMX(pos,reg) \ li reg,pos; \ lvx v20,reg,sp; \ addi reg,reg,16; \ lvx v21,reg,sp; \ addi reg,reg,16; \ lvx v22,reg,sp; \ addi reg,reg,16; \ lvx v23,reg,sp; \ addi reg,reg,16; \ lvx v24,reg,sp; \ addi reg,reg,16; \ lvx v25,reg,sp; \ addi reg,reg,16; \ lvx v26,reg,sp; \ addi reg,reg,16; \ lvx v27,reg,sp; \ addi reg,reg,16; \ lvx v28,reg,sp; \ addi reg,reg,16; \ lvx v29,reg,sp; \ addi reg,reg,16; \ lvx v30,reg,sp; \ addi reg,reg,16; \ lvx v31,reg,sp; # Carefull this will 'clobber' vmx (by design) # Don't call this from C FUNC_START(load_vmx) li r5,0 lvx v20,r5,r3 addi r5,r5,16 lvx v21,r5,r3 addi r5,r5,16 lvx v22,r5,r3 addi r5,r5,16 lvx v23,r5,r3 addi r5,r5,16 lvx v24,r5,r3 addi r5,r5,16 lvx v25,r5,r3 addi r5,r5,16 lvx v26,r5,r3 addi r5,r5,16 lvx v27,r5,r3 addi r5,r5,16 lvx v28,r5,r3 addi r5,r5,16 lvx v29,r5,r3 addi r5,r5,16 lvx v30,r5,r3 addi r5,r5,16 lvx v31,r5,r3 blr FUNC_END(load_vmx) # Should be safe from C, only touches r4, r5 and v0,v1,v2 FUNC_START(check_vmx) PUSH_BASIC_STACK(32) mr r4,r3 li r3,1 # assume a bad result li r5,0 lvx v0,r5,r4 vcmpequd. v1,v0,v20 vmr v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v21 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v22 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v23 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v24 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v25 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v26 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v27 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v28 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v29 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v30 vand v2,v2,v1 addi r5,r5,16 lvx v0,r5,r4 vcmpequd. v1,v0,v31 vand v2,v2,v1 li r5,STACK_FRAME_LOCAL(0,0) stvx v2,r5,sp ldx r0,r5,sp cmpdi r0,0xffffffffffffffff bne 1f li r3,0 1: POP_BASIC_STACK(32) blr FUNC_END(check_vmx) # Safe from C FUNC_START(test_vmx) # r3 holds pointer to where to put the result of fork # r4 holds pointer to the pid # v20-v31 are non-volatile PUSH_BASIC_STACK(512) std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray std r4,STACK_FRAME_PARAM(1)(sp) # address of pid PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4) bl load_vmx nop li r0,__NR_fork sc # Pass the result of fork back to the caller ld r9,STACK_FRAME_PARAM(1)(sp) std r3,0(r9) ld r3,STACK_FRAME_PARAM(0)(sp) bl check_vmx nop POP_VMX(STACK_FRAME_LOCAL(2,0),r4) POP_BASIC_STACK(512) blr FUNC_END(test_vmx) # int preempt_vmx(vector int *varray, int *threads_starting, int *running) # On starting will (atomically) decrement threads_starting as a signal that # the VMX have been loaded with varray. Will proceed to check the validity of # the VMX registers while running is not zero. FUNC_START(preempt_vmx) PUSH_BASIC_STACK(512) std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting std r5,STACK_FRAME_PARAM(2)(sp) # int *running # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0) PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4) bl load_vmx nop sync # Atomic DEC ld r3,STACK_FRAME_PARAM(1)(sp) 1: lwarx r4,0,r3 addi r4,r4,-1 stwcx. r4,0,r3 bne- 1b 2: ld r3,STACK_FRAME_PARAM(0)(sp) bl check_vmx nop cmpdi r3,0 bne 3f ld r4,STACK_FRAME_PARAM(2)(sp) ld r5,0(r4) cmpwi r5,0 bne 2b 3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4) POP_BASIC_STACK(512) blr FUNC_END(preempt_vmx)