#ifndef XTENSA_COREASM_H
#define XTENSA_COREASM_H

/*
 * THIS FILE IS GENERATED -- DO NOT MODIFY BY HAND
 *
 * include/asm-xtensa/xtensa/coreasm.h -- assembler-specific
 * definitions that depend on CORE configuration.
 *
 * Source for configuration-independent binaries (which link in a
 * configuration-specific HAL library) must NEVER include this file.
 * It is perfectly normal, however, for the HAL itself to include this
 * file.
 *
 * This file must NOT include xtensa/config/system.h.  Any assembler
 * header file that depends on system information should likely go in
 * a new systemasm.h (or sysasm.h) header file.
 *
 *  NOTE: macro beqi32 is NOT configuration-dependent, and is placed
 *        here til we will have configuration-independent header file.
 *
 * This file is subject to the terms and conditions of the GNU General
 * Public License.  See the file "COPYING" in the main directory of
 * this archive for more details.
 *
 * Copyright (C) 2002 Tensilica Inc.
 */


#include <xtensa/config/core.h>
#include <xtensa/config/specreg.h>

/*
 *  Assembly-language specific definitions (assembly macros, etc.).
 */

/*----------------------------------------------------------------------
 *  find_ms_setbit
 *
 *  This macro finds the most significant bit that is set in <as>
 *  and return its index + <base> in <ad>, or <base> - 1 if <as> is zero.
 *  The index counts starting at zero for the lsbit, so the return
 *  value ranges from <base>-1 (no bit set) to <base>+31 (msbit set).
 *
 *  Parameters:
 *	<ad>	destination address register (any register)
 *	<as>	source address register
 *	<at>	temporary address register (must be different than <as>)
 *	<base>	constant value added to result (usually 0 or 1)
 *  On entry:
 *	<ad> = undefined if different than <as>
 *	<as> = value whose most significant set bit is to be found
 *	<at> = undefined
 *	no other registers are used by this macro.
 *  On exit:
 *	<ad> = <base> + index of msbit set in original <as>,
 *	     = <base> - 1 if original <as> was zero.
 *	<as> clobbered (if not <ad>)
 *	<at> clobbered (if not <ad>)
 *  Example:
 *	find_ms_setbit a0, a4, a0, 0		-- return in a0 index of msbit set in a4
 */

	.macro	find_ms_setbit ad, as, at, base
#if XCHAL_HAVE_NSA
	movi	\at, 31+\base
	nsau	\as, \as	// get index of \as, numbered from msbit (32 if absent)
	sub	\ad, \at, \as	// get numbering from lsbit (0..31, -1 if absent)
#else /* XCHAL_HAVE_NSA */
	movi	\at, \base	// start with result of 0 (point to lsbit of 32)

	beqz	\as, 2f		// special case for zero argument: return -1
	bltui	\as, 0x10000, 1f	// is it one of the 16 lsbits? (if so, check lower 16 bits)
	addi	\at, \at, 16	// no, increment result to upper 16 bits (of 32)
	//srli	\as, \as, 16	// check upper half (shift right 16 bits)
	extui	\as, \as, 16, 16	// check upper half (shift right 16 bits)
1:	bltui	\as, 0x100, 1f	// is it one of the 8 lsbits? (if so, check lower 8 bits)
	addi	\at, \at, 8	// no, increment result to upper 8 bits (of 16)
	srli	\as, \as, 8	// shift right to check upper 8 bits
1:	bltui	\as, 0x10, 1f	// is it one of the 4 lsbits? (if so, check lower 4 bits)
	addi	\at, \at, 4	// no, increment result to upper 4 bits (of 8)
	srli	\as, \as, 4	// shift right 4 bits to check upper half
1:	bltui	\as, 0x4, 1f	// is it one of the 2 lsbits? (if so, check lower 2 bits)
	addi	\at, \at, 2	// no, increment result to upper 2 bits (of 4)
	srli	\as, \as, 2	// shift right 2 bits to check upper half
1:	bltui	\as, 0x2, 1f	// is it the lsbit?
	addi	\at, \at, 2	// no, increment result to upper bit (of 2)
2:	addi	\at, \at, -1	// (from just above: add 1;  from beqz: return -1)
	//srli	\as, \as, 1
1:				// done! \at contains index of msbit set (or -1 if none set)
	.if	0x\ad - 0x\at	// destination different than \at ? (works because regs are a0-a15)
	mov	\ad, \at	// then move result to \ad
	.endif
#endif /* XCHAL_HAVE_NSA */
	.endm	// find_ms_setbit

/*----------------------------------------------------------------------
 *  find_ls_setbit
 *
 *  This macro finds the least significant bit that is set in <as>,
 *  and return its index in <ad>.
 *  Usage is the same as for the find_ms_setbit macro.
 *  Example:
 *	find_ls_setbit a0, a4, a0, 0	-- return in a0 index of lsbit set in a4
 */

	.macro	find_ls_setbit ad, as, at, base
	neg	\at, \as	// keep only the least-significant bit that is set...
	and	\as, \at, \as	// ... in \as
	find_ms_setbit	\ad, \as, \at, \base
	.endm	// find_ls_setbit

/*----------------------------------------------------------------------
 *  find_ls_one
 *
 *  Same as find_ls_setbit with base zero.
 *  Source (as) and destination (ad) registers must be different.
 *  Provided for backward compatibility.
 */

	.macro	find_ls_one ad, as
	find_ls_setbit	\ad, \as, \ad, 0
	.endm	// find_ls_one

/*----------------------------------------------------------------------
 *  floop, floopnez, floopgtz, floopend
 *
 *  These macros are used for fast inner loops that
 *  work whether or not the Loops options is configured.
 *  If the Loops option is configured, they simply use
 *  the zero-overhead LOOP instructions; otherwise
 *  they use explicit decrement and branch instructions.
 *
 *  They are used in pairs, with floop, floopnez or floopgtz
 *  at the beginning of the loop, and floopend at the end.
 *
 *  Each pair of loop macro calls must be given the loop count
 *  address register and a unique label for that loop.
 *
 *  Example:
 *
 *	movi	 a3, 16     // loop 16 times
 *	floop    a3, myloop1
 *	:
 *	bnez     a7, end1	// exit loop if a7 != 0
 *	:
 *	floopend a3, myloop1
 *  end1:
 *
 *  Like the LOOP instructions, these macros cannot be
 *  nested, must include at least one instruction,
 *  cannot call functions inside the loop, etc.
 *  The loop can be exited by jumping to the instruction
 *  following floopend (or elsewhere outside the loop),
 *  or continued by jumping to a NOP instruction placed
 *  immediately before floopend.
 *
 *  Unlike LOOP instructions, the register passed to floop*
 *  cannot be used inside the loop, because it is used as
 *  the loop counter if the Loops option is not configured.
 *  And its value is undefined after exiting the loop.
 *  And because the loop counter register is active inside
 *  the loop, you can't easily use this construct to loop
 *  across a register file using ROTW as you might with LOOP
 *  instructions, unless you copy the loop register along.
 */

	/*  Named label version of the macros:  */

	.macro	floop		ar, endlabel
	floop_		\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
	.endm

	.macro	floopnez	ar, endlabel
	floopnez_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
	.endm

	.macro	floopgtz	ar, endlabel
	floopgtz_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
	.endm

	.macro	floopend	ar, endlabel
	floopend_	\ar, .Lfloopstart_\endlabel, .Lfloopend_\endlabel
	.endm

	/*  Numbered local label version of the macros:  */
#if 0 /*UNTESTED*/
	.macro	floop89		ar
	floop_		\ar, 8, 9f
	.endm

	.macro	floopnez89	ar
	floopnez_	\ar, 8, 9f
	.endm

	.macro	floopgtz89	ar
	floopgtz_	\ar, 8, 9f
	.endm

	.macro	floopend89	ar
	floopend_	\ar, 8b, 9
	.endm
#endif /*0*/

	/*  Underlying version of the macros:  */

	.macro	floop_	ar, startlabel, endlabelref
	.ifdef	_infloop_
	.if	_infloop_
	.err	// Error: floop cannot be nested
	.endif
	.endif
	.set	_infloop_, 1
#if XCHAL_HAVE_LOOPS
	loop	\ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
\startlabel:
	addi	\ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
	.endm	// floop_

	.macro	floopnez_	ar, startlabel, endlabelref
	.ifdef	_infloop_
	.if	_infloop_
	.err	// Error: floopnez cannot be nested
	.endif
	.endif
	.set	_infloop_, 1
#if XCHAL_HAVE_LOOPS
	loopnez	\ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
	beqz	\ar, \endlabelref
\startlabel:
	addi	\ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
	.endm	// floopnez_

	.macro	floopgtz_	ar, startlabel, endlabelref
	.ifdef	_infloop_
	.if	_infloop_
	.err	// Error: floopgtz cannot be nested
	.endif
	.endif
	.set	_infloop_, 1
#if XCHAL_HAVE_LOOPS
	loopgtz	\ar, \endlabelref
#else /* XCHAL_HAVE_LOOPS */
	bltz	\ar, \endlabelref
	beqz	\ar, \endlabelref
\startlabel:
	addi	\ar, \ar, -1
#endif /* XCHAL_HAVE_LOOPS */
	.endm	// floopgtz_


	.macro	floopend_	ar, startlabelref, endlabel
	.ifndef	_infloop_
	.err	// Error: floopend without matching floopXXX
	.endif
	.ifeq	_infloop_
	.err	// Error: floopend without matching floopXXX
	.endif
	.set	_infloop_, 0
#if ! XCHAL_HAVE_LOOPS
	bnez	\ar, \startlabelref
#endif /* XCHAL_HAVE_LOOPS */
\endlabel:
	.endm	// floopend_

/*----------------------------------------------------------------------
 *  crsil  --  conditional RSIL (read/set interrupt level)
 *
 *  Executes the RSIL instruction if it exists, else just reads PS.
 *  The RSIL instruction does not exist in the new exception architecture
 *  if the interrupt option is not selected.
 */

	.macro	crsil	ar, newlevel
#if XCHAL_HAVE_OLD_EXC_ARCH || XCHAL_HAVE_INTERRUPTS
	rsil	\ar, \newlevel
#else
	rsr	\ar, PS
#endif
	.endm	// crsil

/*----------------------------------------------------------------------
 *  window_spill{4,8,12}
 *
 *  These macros spill callers' register windows to the stack.
 *  They work for both privileged and non-privileged tasks.
 *  Must be called from a windowed ABI context, eg. within
 *  a windowed ABI function (ie. valid stack frame, window
 *  exceptions enabled, not in exception mode, etc).
 *
 *  This macro requires a single invocation of the window_spill_common
 *  macro in the same assembly unit and section.
 *
 *  Note that using window_spill{4,8,12} macros is more efficient
 *  than calling a function implemented using window_spill_function,
 *  because the latter needs extra code to figure out the size of
 *  the call to the spilling function.
 *
 *  Example usage:
 *
 *		.text
 *		.align	4
 *		.global	some_function
 *		.type	some_function,@function
 *	some_function:
 *		entry	a1, 16
 *		:
 *		:
 *
 *		window_spill4	// spill windows of some_function's callers; preserves a0..a3 only;
 *				// to use window_spill{8,12} in this example function we'd have
 *				// to increase space allocated by the entry instruction, because
 *				// 16 bytes only allows call4; 32 or 48 bytes (+locals) are needed
 *				// for call8/window_spill8 or call12/window_spill12 respectively.
 *		:
 *
 *		retw
 *
 *		window_spill_common	// instantiates code used by window_spill4
 *
 *
 *  On entry:
 *	none (if window_spill4)
 *	stack frame has enough space allocated for call8 (if window_spill8)
 *	stack frame has enough space allocated for call12 (if window_spill12)
 *  On exit:
 *	 a4..a15 clobbered (if window_spill4)
 *	 a8..a15 clobbered (if window_spill8)
 *	a12..a15 clobbered (if window_spill12)
 *	no caller windows are in live registers
 */

	.macro	window_spill4
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
	movi	a15, 0			// for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
	call4	.L__wdwspill_assist28	// call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
	call4	.L__wdwspill_assist60	// call deep enough to clear out any live callers
# endif
#endif
	.endm	// window_spill4

	.macro	window_spill8
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
	movi	a15, 0			// for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
	call8	.L__wdwspill_assist24	// call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
	call8	.L__wdwspill_assist56	// call deep enough to clear out any live callers
# endif
#endif
	.endm	// window_spill8

	.macro	window_spill12
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 16
	movi	a15, 0			// for 16-register files, no need to call to reach the end
# elif XCHAL_NUM_AREGS == 32
	call12	.L__wdwspill_assist20	// call deep enough to clear out any live callers
# elif XCHAL_NUM_AREGS == 64
	call12	.L__wdwspill_assist52	// call deep enough to clear out any live callers
# endif
#endif
	.endm	// window_spill12

/*----------------------------------------------------------------------
 *  window_spill_function
 *
 *  This macro outputs a function that will spill its caller's callers'
 *  register windows to the stack.  Eg. it could be used to implement
 *  a version of xthal_window_spill() that works in non-privileged tasks.
 *  This works for both privileged and non-privileged tasks.
 *
 *  Typical usage:
 *
 *		.text
 *		.align	4
 *		.global	my_spill_function
 *		.type	my_spill_function,@function
 *	my_spill_function:
 *		window_spill_function
 *
 *  On entry to resulting function:
 *	none
 *  On exit from resulting function:
 *	none (no caller windows are in live registers)
 */

	.macro	window_spill_function
#if XCHAL_HAVE_WINDOWED
# if XCHAL_NUM_AREGS == 32
	entry	sp, 48
	bbci.l	a0, 31, 1f		// branch if called with call4
	bbsi.l	a0, 30, 2f		// branch if called with call12
	call8	.L__wdwspill_assist16	// called with call8, only need another 8
	retw
1:	call12	.L__wdwspill_assist16	// called with call4, only need another 12
	retw
2:	call4	.L__wdwspill_assist16	// called with call12, only need another 4
	retw
# elif XCHAL_NUM_AREGS == 64
	entry	sp, 48
	bbci.l	a0, 31, 1f		// branch if called with call4
	bbsi.l	a0, 30, 2f		// branch if called with call12
	call4	.L__wdwspill_assist52	// called with call8, only need a call4
	retw
1:	call8	.L__wdwspill_assist52	// called with call4, only need a call8
	retw
2:	call12	.L__wdwspill_assist40	// called with call12, can skip a call12
	retw
# elif XCHAL_NUM_AREGS == 16
	entry	sp, 16
	bbci.l	a0, 31, 1f	// branch if called with call4
	bbsi.l	a0, 30, 2f	// branch if called with call12
	movi	a7, 0		// called with call8
	retw
1:	movi	a11, 0		// called with call4
2:	retw			// if called with call12, everything already spilled

//	movi	a15, 0		// trick to spill all but the direct caller
//	j	1f
//	//  The entry instruction is magical in the assembler (gets auto-aligned)
//	//  so we have to jump to it to avoid falling through the padding.
//	//  We need entry/retw to know where to return.
//1:	entry	sp, 16
//	retw
# else
#  error "unrecognized address register file size"
# endif
#endif /* XCHAL_HAVE_WINDOWED */
	window_spill_common
	.endm	// window_spill_function

/*----------------------------------------------------------------------
 *  window_spill_common
 *
 *  Common code used by any number of invocations of the window_spill##
 *  and window_spill_function macros.
 *
 *  Must be instantiated exactly once within a given assembly unit,
 *  within call/j range of and same section as window_spill##
 *  macro invocations for that assembly unit.
 *  (Is automatically instantiated by the window_spill_function macro.)
 */

	.macro	window_spill_common
#if XCHAL_HAVE_WINDOWED && (XCHAL_NUM_AREGS == 32 || XCHAL_NUM_AREGS == 64)
	.ifndef	.L__wdwspill_defined
# if XCHAL_NUM_AREGS >= 64
.L__wdwspill_assist60:
	entry	sp, 32
	call8	.L__wdwspill_assist52
	retw
.L__wdwspill_assist56:
	entry	sp, 16
	call4	.L__wdwspill_assist52
	retw
.L__wdwspill_assist52:
	entry	sp, 48
	call12	.L__wdwspill_assist40
	retw
.L__wdwspill_assist40:
	entry	sp, 48
	call12	.L__wdwspill_assist28
	retw
# endif
.L__wdwspill_assist28:
	entry	sp, 48
	call12	.L__wdwspill_assist16
	retw
.L__wdwspill_assist24:
	entry	sp, 32
	call8	.L__wdwspill_assist16
	retw
.L__wdwspill_assist20:
	entry	sp, 16
	call4	.L__wdwspill_assist16
	retw
.L__wdwspill_assist16:
	entry	sp, 16
	movi	a15, 0
	retw
	.set	.L__wdwspill_defined, 1
	.endif
#endif /* XCHAL_HAVE_WINDOWED with 32 or 64 aregs */
	.endm	// window_spill_common

/*----------------------------------------------------------------------
 *  beqi32
 *
 *  macro implements version of beqi for arbitrary 32-bit immidiate value
 *
 *     beqi32 ax, ay, imm32, label
 *
 *  Compares value in register ax with imm32 value and jumps to label if
 *  equal. Clobberes register ay if needed
 *
 */
   .macro beqi32	ax, ay, imm, label
    .ifeq ((\imm-1) & ~7)	// 1..8 ?
		beqi	\ax, \imm, \label
    .else
      .ifeq (\imm+1)		// -1 ?
		beqi	\ax, \imm, \label
      .else
        .ifeq (\imm)		// 0 ?
		beqz	\ax, \label
        .else
		//  We could also handle immediates 10,12,16,32,64,128,256
		//  but it would be a long macro...
		movi	\ay, \imm
		beq	\ax, \ay, \label
        .endif
      .endif
    .endif
   .endm // beqi32

#endif /*XTENSA_COREASM_H*/