summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp')
-rw-r--r--gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp88
1 files changed, 48 insertions, 40 deletions
diff --git a/gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp b/gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 6925b272b4a..9320e1e2226 100644
--- a/gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/gnu/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -38,6 +38,10 @@ namespace {
VZeroUpperInserter() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::AllVRegsAllocated);
+ }
const char *getPassName() const override {return "X86 vzeroupper inserter";}
private:
@@ -80,6 +84,7 @@ namespace {
BlockStateMap BlockStates;
DirtySuccessorsWorkList DirtySuccessors;
bool EverMadeChange;
+ bool IsX86INTR;
const TargetInstrInfo *TII;
static char ID;
@@ -122,10 +127,9 @@ static bool clobbersAllYmmRegs(const MachineOperand &MO) {
return true;
}
-static bool hasYmmReg(MachineInstr *MI) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MI->isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
+static bool hasYmmReg(MachineInstr &MI) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MI.isCall() && MO.isRegMask() && !clobbersAllYmmRegs(MO))
return true;
if (!MO.isReg())
continue;
@@ -137,12 +141,10 @@ static bool hasYmmReg(MachineInstr *MI) {
return false;
}
-/// clobbersAnyYmmReg() - Check if any YMM register will be clobbered by this
-/// instruction.
-static bool callClobbersAnyYmmReg(MachineInstr *MI) {
- assert(MI->isCall() && "Can only be called on call instructions.");
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+/// Check if any YMM register will be clobbered by this instruction.
+static bool callClobbersAnyYmmReg(MachineInstr &MI) {
+ assert(MI.isCall() && "Can only be called on call instructions.");
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isRegMask())
continue;
for (unsigned reg = X86::YMM0; reg <= X86::YMM15; ++reg) {
@@ -153,16 +155,16 @@ static bool callClobbersAnyYmmReg(MachineInstr *MI) {
return false;
}
-// Insert a vzeroupper instruction before I.
+/// Insert a vzeroupper instruction before I.
void VZeroUpperInserter::insertVZeroUpper(MachineBasicBlock::iterator I,
- MachineBasicBlock &MBB) {
+ MachineBasicBlock &MBB) {
DebugLoc dl = I->getDebugLoc();
BuildMI(MBB, I, dl, TII->get(X86::VZEROUPPER));
++NumVZU;
EverMadeChange = true;
}
-// Add MBB to the DirtySuccessors list if it hasn't already been added.
+/// Add MBB to the DirtySuccessors list if it hasn't already been added.
void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
if (!BlockStates[MBB.getNumber()].AddedToDirtySuccessors) {
DirtySuccessors.push_back(&MBB);
@@ -170,21 +172,29 @@ void VZeroUpperInserter::addDirtySuccessor(MachineBasicBlock &MBB) {
}
}
-/// processBasicBlock - Loop over all of the instructions in the basic block,
-/// inserting vzeroupper instructions before function calls.
+/// Loop over all of the instructions in the basic block, inserting vzeroupper
+/// instructions before function calls.
void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
- // Start by assuming that the block PASS_THROUGH, which implies no unguarded
+ // Start by assuming that the block is PASS_THROUGH which implies no unguarded
// calls.
BlockExitState CurState = PASS_THROUGH;
BlockStates[MBB.getNumber()].FirstUnguardedCall = MBB.end();
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
- MachineInstr *MI = I;
- bool isControlFlow = MI->isCall() || MI->isReturn();
+ for (MachineInstr &MI : MBB) {
+ // No need for vzeroupper before iret in interrupt handler function,
+ // epilogue will restore YMM registers if needed.
+ bool IsReturnFromX86INTR = IsX86INTR && MI.isReturn();
+ bool IsControlFlow = MI.isCall() || MI.isReturn();
+
+ // An existing VZERO* instruction resets the state.
+ if (MI.getOpcode() == X86::VZEROALL || MI.getOpcode() == X86::VZEROUPPER) {
+ CurState = EXITS_CLEAN;
+ continue;
+ }
// Shortcut: don't need to check regular instructions in dirty state.
- if (!isControlFlow && CurState == EXITS_DIRTY)
+ if ((!IsControlFlow || IsReturnFromX86INTR) && CurState == EXITS_DIRTY)
continue;
if (hasYmmReg(MI)) {
@@ -196,7 +206,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// Check for control-flow out of the current function (which might
// indirectly execute SSE instructions).
- if (!isControlFlow)
+ if (!IsControlFlow || IsReturnFromX86INTR)
continue;
// If the call won't clobber any YMM register, skip it as well. It usually
@@ -204,22 +214,21 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// standard calling convention is not used (RegMask is not used to mark
// register clobbered and register usage (def/imp-def/use) is well-defined
// and explicitly specified.
- if (MI->isCall() && !callClobbersAnyYmmReg(MI))
+ if (MI.isCall() && !callClobbersAnyYmmReg(MI))
continue;
- // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX
- // registers. This instruction has zero latency. In addition, the processor
- // changes back to Clean state, after which execution of Intel SSE
- // instructions or Intel AVX instructions has no transition penalty. Add
- // the VZEROUPPER instruction before any function call/return that might
- // execute SSE code.
+ // The VZEROUPPER instruction resets the upper 128 bits of all AVX
+ // registers. In addition, the processor changes back to Clean state, after
+ // which execution of SSE instructions or AVX instructions has no transition
+ // penalty. Add the VZEROUPPER instruction before any function call/return
+ // that might execute SSE code.
// FIXME: In some cases, we may want to move the VZEROUPPER into a
// predecessor block.
if (CurState == EXITS_DIRTY) {
// After the inserted VZEROUPPER the state becomes clean again, but
// other YMM may appear before other subsequent calls or even before
// the end of the BB.
- insertVZeroUpper(I, MBB);
+ insertVZeroUpper(MI, MBB);
CurState = EXITS_CLEAN;
} else if (CurState == PASS_THROUGH) {
// If this block is currently in pass-through state and we encounter a
@@ -227,7 +236,7 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
// block has successors that exit dirty. Record the location of the call,
// and set the state to EXITS_CLEAN, but do not insert the vzeroupper yet.
// It will be inserted later if necessary.
- BlockStates[MBB.getNumber()].FirstUnguardedCall = I;
+ BlockStates[MBB.getNumber()].FirstUnguardedCall = MI;
CurState = EXITS_CLEAN;
}
}
@@ -244,15 +253,16 @@ void VZeroUpperInserter::processBasicBlock(MachineBasicBlock &MBB) {
BlockStates[MBB.getNumber()].ExitState = CurState;
}
-/// runOnMachineFunction - Loop over all of the basic blocks, inserting
-/// vzeroupper instructions before function calls.
+/// Loop over all of the basic blocks, inserting vzeroupper instructions before
+/// function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasAVX512())
+ if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
EverMadeChange = false;
+ IsX86INTR = MF.getFunction()->getCallingConv() == CallingConv::X86_INTR;
bool FnHasLiveInYmm = checkFnHasLiveInYmm(MRI);
@@ -284,12 +294,12 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
for (MachineBasicBlock &MBB : MF)
processBasicBlock(MBB);
- // If any YMM regs are live in to this function, add the entry block to the
+ // If any YMM regs are live-in to this function, add the entry block to the
// DirtySuccessors list
if (FnHasLiveInYmm)
addDirtySuccessor(MF.front());
- // Re-visit all blocks that are successors of EXITS_DIRTY bsocks. Add
+ // Re-visit all blocks that are successors of EXITS_DIRTY blocks. Add
// vzeroupper instructions to unguarded calls, and propagate EXITS_DIRTY
// through PASS_THROUGH blocks.
while (!DirtySuccessors.empty()) {
@@ -302,16 +312,14 @@ bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
if (BBState.FirstUnguardedCall != MBB.end())
insertVZeroUpper(BBState.FirstUnguardedCall, MBB);
- // If this successor was a pass-through block then it is now dirty, and its
+ // If this successor was a pass-through block, then it is now dirty. Its
// successors need to be added to the worklist (if they haven't been
// already).
if (BBState.ExitState == PASS_THROUGH) {
DEBUG(dbgs() << "MBB #" << MBB.getNumber()
<< " was Pass-through, is now Dirty-out.\n");
- for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
- SE = MBB.succ_end();
- SI != SE; ++SI)
- addDirtySuccessor(**SI);
+ for (MachineBasicBlock *Succ : MBB.successors())
+ addDirtySuccessor(*Succ);
}
}