From 1ac832509f2ea1b566f0c06f98f308f58b03d098 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Thu, 22 Nov 2018 11:14:35 +0800 Subject: nds32: Support FP emulation The Andes FPU coprocessor does not support denormalized number handling. According to the specification, FPU generates a denorm input exception that requires the kernel to deal with this instrution operation when it encounters denormalized operands. Hence an nds32 FPU ISA emulator in the kernel is required to meet requirement. Signed-off-by: Vincent Chen Signed-off-by: Nickhu Acked-by: Greentime Hu Signed-off-by: Greentime Hu --- arch/nds32/include/asm/fpu.h | 1 + arch/nds32/include/asm/fpuemu.h | 32 +++++++ arch/nds32/include/asm/nds32_fpu_inst.h | 109 ++++++++++++++++++++++ arch/nds32/include/asm/sfp-machine.h | 158 ++++++++++++++++++++++++++++++++ 4 files changed, 300 insertions(+) create mode 100644 arch/nds32/include/asm/fpuemu.h create mode 100644 arch/nds32/include/asm/nds32_fpu_inst.h create mode 100644 arch/nds32/include/asm/sfp-machine.h (limited to 'arch/nds32/include') diff --git a/arch/nds32/include/asm/fpu.h b/arch/nds32/include/asm/fpu.h index f7a7f6b2ea8f..9b1107b58e23 100644 --- a/arch/nds32/include/asm/fpu.h +++ b/arch/nds32/include/asm/fpu.h @@ -15,6 +15,7 @@ extern bool has_fpu; extern void save_fpu(struct task_struct *__tsk); extern void load_fpu(const struct fpu_struct *fpregs); extern bool do_fpu_exception(unsigned int subtype, struct pt_regs *regs); +extern int do_fpuemu(struct pt_regs *regs, struct fpu_struct *fpu); #define test_tsk_fpu(regs) (regs->fucop_ctl & FUCOP_CTL_mskCP0EN) diff --git a/arch/nds32/include/asm/fpuemu.h b/arch/nds32/include/asm/fpuemu.h new file mode 100644 index 000000000000..c4bd0c7faa75 --- /dev/null +++ b/arch/nds32/include/asm/fpuemu.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#ifndef __ARCH_NDS32_FPUEMU_H +#define __ARCH_NDS32_FPUEMU_H + +/* + * single precision + */ + +void fadds(void *ft, void *fa, void *fb); +void fsubs(void *ft, void *fa, void *fb); +void fmuls(void *ft, void *fa, void *fb); +void fdivs(void *ft, void *fa, void *fb); +void fs2d(void *ft, void *fa); +void fsqrts(void *ft, void *fa); +void fnegs(void *ft, void *fa); +int fcmps(void *ft, void *fa, void *fb, int cop); + +/* + * double precision + */ +void faddd(void *ft, void *fa, void *fb); +void fsubd(void *ft, void *fa, void *fb); +void fmuld(void *ft, void *fa, void *fb); +void fdivd(void *ft, void *fa, void *fb); +void fsqrtd(void *ft, void *fa); +void fd2s(void *ft, void *fa); +void fnegd(void *ft, void *fa); +int fcmpd(void *ft, void *fa, void *fb, int cop); + +#endif /* __ARCH_NDS32_FPUEMU_H */ diff --git a/arch/nds32/include/asm/nds32_fpu_inst.h b/arch/nds32/include/asm/nds32_fpu_inst.h new file mode 100644 index 000000000000..1e4b86a90a48 --- /dev/null +++ b/arch/nds32/include/asm/nds32_fpu_inst.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#ifndef __NDS32_FPU_INST_H +#define __NDS32_FPU_INST_H + +#define cop0_op 0x35 + +/* + * COP0 field of opcodes. + */ +#define fs1_op 0x0 +#define fs2_op 0x4 +#define fd1_op 0x8 +#define fd2_op 0xc + +/* + * FS1 opcode. + */ +enum fs1 { + fadds_op, fsubs_op, fcpynss_op, fcpyss_op, + fmadds_op, fmsubs_op, fcmovns_op, fcmovzs_op, + fnmadds_op, fnmsubs_op, + fmuls_op = 0xc, fdivs_op, + fs1_f2op_op = 0xf +}; + +/* + * FS1/F2OP opcode. + */ +enum fs1_f2 { + fs2d_op, fsqrts_op, + fui2s_op = 0x8, fsi2s_op = 0xc, + fs2ui_op = 0x10, fs2ui_z_op = 0x14, + fs2si_op = 0x18, fs2si_z_op = 0x1c +}; + +/* + * FS2 opcode. + */ +enum fs2 { + fcmpeqs_op, fcmpeqs_e_op, fcmplts_op, fcmplts_e_op, + fcmples_op, fcmples_e_op, fcmpuns_op, fcmpuns_e_op +}; + +/* + * FD1 opcode. + */ +enum fd1 { + faddd_op, fsubd_op, fcpynsd_op, fcpysd_op, + fmaddd_op, fmsubd_op, fcmovnd_op, fcmovzd_op, + fnmaddd_op, fnmsubd_op, + fmuld_op = 0xc, fdivd_op, fd1_f2op_op = 0xf +}; + +/* + * FD1/F2OP opcode. + */ +enum fd1_f2 { + fd2s_op, fsqrtd_op, + fui2d_op = 0x8, fsi2d_op = 0xc, + fd2ui_op = 0x10, fd2ui_z_op = 0x14, + fd2si_op = 0x18, fd2si_z_op = 0x1c +}; + +/* + * FD2 opcode. + */ +enum fd2 { + fcmpeqd_op, fcmpeqd_e_op, fcmpltd_op, fcmpltd_e_op, + fcmpled_op, fcmpled_e_op, fcmpund_op, fcmpund_e_op +}; + +#define NDS32Insn(x) x + +#define I_OPCODE_off 25 +#define NDS32Insn_OPCODE(x) (NDS32Insn(x) >> I_OPCODE_off) + +#define I_OPCODE_offRt 20 +#define I_OPCODE_mskRt (0x1fUL << I_OPCODE_offRt) +#define NDS32Insn_OPCODE_Rt(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRt) >> I_OPCODE_offRt) + +#define I_OPCODE_offRa 15 +#define I_OPCODE_mskRa (0x1fUL << I_OPCODE_offRa) +#define NDS32Insn_OPCODE_Ra(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRa) >> I_OPCODE_offRa) + +#define I_OPCODE_offRb 10 +#define I_OPCODE_mskRb (0x1fUL << I_OPCODE_offRb) +#define NDS32Insn_OPCODE_Rb(x) \ + ((NDS32Insn(x) & I_OPCODE_mskRb) >> I_OPCODE_offRb) + +#define I_OPCODE_offbit1014 10 +#define I_OPCODE_mskbit1014 (0x1fUL << I_OPCODE_offbit1014) +#define NDS32Insn_OPCODE_BIT1014(x) \ + ((NDS32Insn(x) & I_OPCODE_mskbit1014) >> I_OPCODE_offbit1014) + +#define I_OPCODE_offbit69 6 +#define I_OPCODE_mskbit69 (0xfUL << I_OPCODE_offbit69) +#define NDS32Insn_OPCODE_BIT69(x) \ + ((NDS32Insn(x) & I_OPCODE_mskbit69) >> I_OPCODE_offbit69) + +#define I_OPCODE_offCOP0 0 +#define I_OPCODE_mskCOP0 (0x3fUL << I_OPCODE_offCOP0) +#define NDS32Insn_OPCODE_COP0(x) \ + ((NDS32Insn(x) & I_OPCODE_mskCOP0) >> I_OPCODE_offCOP0) + +#endif /* __NDS32_FPU_INST_H */ diff --git a/arch/nds32/include/asm/sfp-machine.h b/arch/nds32/include/asm/sfp-machine.h new file mode 100644 index 000000000000..b1a5caa332b5 --- /dev/null +++ b/arch/nds32/include/asm/sfp-machine.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2005-2018 Andes Technology Corporation */ + +#include + +#define _FP_W_TYPE_SIZE 32 +#define _FP_W_TYPE unsigned long +#define _FP_WS_TYPE signed long +#define _FP_I_TYPE long + +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +#define _FP_MUL_MEAT_S(R, X, Y) \ + _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_D(R, X, Y) \ + _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_Q(R, X, Y) \ + _FP_MUL_MEAT_4_wide(_FP_WFRACBITS_Q, R, X, Y, umul_ppmm) + +#define _FP_MUL_MEAT_DW_S(R, X, Y) \ + _FP_MUL_MEAT_DW_1_wide(_FP_WFRACBITS_S, R, X, Y, umul_ppmm) +#define _FP_MUL_MEAT_DW_D(R, X, Y) \ + _FP_MUL_MEAT_DW_2_wide(_FP_WFRACBITS_D, R, X, Y, umul_ppmm) + +#define _FP_DIV_MEAT_S(R, X, Y) _FP_DIV_MEAT_1_udiv_norm(S, R, X, Y) +#define _FP_DIV_MEAT_D(R, X, Y) _FP_DIV_MEAT_2_udiv(D, R, X, Y) + +#define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) +#define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 +#define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 +#define _FP_NANSIGN_S 0 +#define _FP_NANSIGN_D 0 +#define _FP_NANSIGN_Q 0 + +#define _FP_KEEPNANFRACP 1 +#define _FP_QNANNEGATEDP 0 + +#define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ +do { \ + if ((_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs) \ + && !(_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs)) { \ + R##_s = Y##_s; \ + _FP_FRAC_COPY_##wc(R, Y); \ + } else { \ + R##_s = X##_s; \ + _FP_FRAC_COPY_##wc(R, X); \ + } \ + R##_c = FP_CLS_NAN; \ +} while (0) + +#define __FPU_FPCSR (current->thread.fpu.fpcsr) + +/* Obtain the current rounding mode. */ +#define FP_ROUNDMODE \ +({ \ + __FPU_FPCSR & FPCSR_mskRM; \ +}) + +#define FP_RND_NEAREST 0 +#define FP_RND_PINF 1 +#define FP_RND_MINF 2 +#define FP_RND_ZERO 3 + +#define FP_EX_INVALID FPCSR_mskIVO +#define FP_EX_DIVZERO FPCSR_mskDBZ +#define FP_EX_OVERFLOW FPCSR_mskOVF +#define FP_EX_UNDERFLOW FPCSR_mskUDF +#define FP_EX_INEXACT FPCSR_mskIEX + +#define SF_CEQ 2 +#define SF_CLT 1 +#define SF_CGT 3 +#define SF_CUN 4 + +#include + +#ifdef __BIG_ENDIAN__ +#define __BYTE_ORDER __BIG_ENDIAN +#define __LITTLE_ENDIAN 0 +#else +#define __BYTE_ORDER __LITTLE_ENDIAN +#define __BIG_ENDIAN 0 +#endif + +#define abort() do { } while (0) +#define umul_ppmm(w1, w0, u, v) \ +do { \ + UWtype __x0, __x1, __x2, __x3; \ + UHWtype __ul, __vl, __uh, __vh; \ + \ + __ul = __ll_lowpart(u); \ + __uh = __ll_highpart(u); \ + __vl = __ll_lowpart(v); \ + __vh = __ll_highpart(v); \ + \ + __x0 = (UWtype) __ul * __vl; \ + __x1 = (UWtype) __ul * __vh; \ + __x2 = (UWtype) __uh * __vl; \ + __x3 = (UWtype) __uh * __vh; \ + \ + __x1 += __ll_highpart(__x0); \ + __x1 += __x2; \ + if (__x1 < __x2) \ + __x3 += __ll_B; \ + \ + (w1) = __x3 + __ll_highpart(__x1); \ + (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0); \ +} while (0) + +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) + (bl); \ + (sh) = (ah) + (bh) + (__x < (al)); \ + (sl) = __x; \ +} while (0) + +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ +do { \ + UWtype __x; \ + __x = (al) - (bl); \ + (sh) = (ah) - (bh) - (__x > (al)); \ + (sl) = __x; \ +} while (0) + +#define udiv_qrnnd(q, r, n1, n0, d) \ +do { \ + UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \ + __d1 = __ll_highpart(d); \ + __d0 = __ll_lowpart(d); \ + \ + __r1 = (n1) % __d1; \ + __q1 = (n1) / __d1; \ + __m = (UWtype) __q1 * __d0; \ + __r1 = __r1 * __ll_B | __ll_highpart(n0); \ + if (__r1 < __m) { \ + __q1--, __r1 += (d); \ + if (__r1 >= (d)) \ + if (__r1 < __m) \ + __q1--, __r1 += (d); \ + } \ + __r1 -= __m; \ + __r0 = __r1 % __d1; \ + __q0 = __r1 / __d1; \ + __m = (UWtype) __q0 * __d0; \ + __r0 = __r0 * __ll_B | __ll_lowpart(n0); \ + if (__r0 < __m) { \ + __q0--, __r0 += (d); \ + if (__r0 >= (d)) \ + if (__r0 < __m) \ + __q0--, __r0 += (d); \ + } \ + __r0 -= __m; \ + (q) = (UWtype) __q1 * __ll_B | __q0; \ + (r) = __r0; \ +} while (0) -- cgit v1.2.3-59-g8ed1b