summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordlg <dlg@openbsd.org>2019-01-30 03:08:12 +0000
committerdlg <dlg@openbsd.org>2019-01-30 03:08:12 +0000
commit728e23dfc36a7a5f9a19f349b6c8d489de3d3859 (patch)
treecd910b6c2a97a90f6de6b76e84d2d74246997cc1
parentuse MPLS_SHIM2LABEL and MPLS_LABEL2SHIM (diff)
downloadwireguard-openbsd-728e23dfc36a7a5f9a19f349b6c8d489de3d3859.tar.xz
wireguard-openbsd-728e23dfc36a7a5f9a19f349b6c8d489de3d3859.zip
implement -msave-args in clang/llvm, like the sun did for gcc
this is a bit different to gcc as gcc likes to use movs to move stuff on and off the stack, and directly updates the stack pointers with add and sub instructions. llvm prefers to use push and pop instructions, is a lot more careful about keeping track of how much stuff is currently on the stack, and generally pops the frame pointer rather than do maths on it. -msave-args adds a bunch of pushes as the first thing a function prologue does. to keep the stack aligned, if there's an odd number of arguments to the function it pushes the first one again to put the frame back on a 16 byte boundary. to undo the pushes the frame pointer needs to be updated in function epilogues. clang emits a series of pops to fix up the registers on the way out, but popping saved arguments is a waste of time and harmful to actual data in the function. rather than add an offset to the stack pointer, -msave-args emits a leaveq operation to fix up the frame again. leaveq is effectively mov rbp,rsp; pop rbp, and is a single byte, meaning there's less potential for gadgets compared to a direct add to rsp, or an explicit mov rbp,rsp. the only thing missing compared to the gcc implementation is adding the SUN_amd64_parmdump dwarf flag to affected functions. if someone can tell me how to add that from the frame lowering code, let me know. when enabled in kernel builds again, this will provide useful arguments in ddb stack traces again.
-rw-r--r--gnu/llvm/include/llvm/BinaryFormat/Dwarf.def2
-rw-r--r--gnu/llvm/lib/Target/X86/X86.td3
-rw-r--r--gnu/llvm/lib/Target/X86/X86FrameLowering.cpp97
-rw-r--r--gnu/llvm/lib/Target/X86/X86FrameLowering.h2
-rw-r--r--gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h6
-rw-r--r--gnu/llvm/lib/Target/X86/X86Subtarget.h5
-rw-r--r--gnu/llvm/tools/clang/include/clang/Driver/Options.td2
-rw-r--r--gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp3
-rw-r--r--gnu/llvm/tools/clang/lib/Basic/Targets/X86.h1
9 files changed, 106 insertions, 15 deletions
diff --git a/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def b/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
index 944c5dd1c15..41ed16d6f45 100644
--- a/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/gnu/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -355,6 +355,8 @@ HANDLE_DW_AT(0x2133, GNU_addr_base, 0, GNU)
HANDLE_DW_AT(0x2134, GNU_pubnames, 0, GNU)
HANDLE_DW_AT(0x2135, GNU_pubtypes, 0, GNU)
HANDLE_DW_AT(0x2136, GNU_discriminator, 0, GNU)
+// Sun Extension
+HANDLE_DW_AT(0x2224, SUN_amd64_parmdump, 0, GNU)
// Borland extensions.
HANDLE_DW_AT(0x3b11, BORLAND_property_read, 0, BORLAND)
HANDLE_DW_AT(0x3b12, BORLAND_property_write, 0, BORLAND)
diff --git a/gnu/llvm/lib/Target/X86/X86.td b/gnu/llvm/lib/Target/X86/X86.td
index 63c2dc4da6c..3ce047a4920 100644
--- a/gnu/llvm/lib/Target/X86/X86.td
+++ b/gnu/llvm/lib/Target/X86/X86.td
@@ -284,6 +284,9 @@ def FeatureLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt",
"LZCNT/TZCNT have a false dependency on dest register">;
def FeaturePCONFIG : SubtargetFeature<"pconfig", "HasPCONFIG", "true",
"platform configuration instruction">;
+def FeatureSaveArgs
+ : SubtargetFeature<"save-args", "SaveArgs", "true",
+ "Save register arguments on the stack.">;
// On recent X86 (port bound) processors, its preferable to combine to a single shuffle
// using a variable mask over multiple fixed shuffles.
def FeatureFastVariableShuffle
diff --git a/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp b/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
index ea076e576ef..1a8d3b03836 100644
--- a/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/gnu/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -48,6 +48,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
// standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
StackPtr = TRI->getStackRegister();
+ SaveArgs = Is64Bit ? STI.getSaveArgs() : 0;
}
bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
@@ -91,7 +92,8 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
MFI.hasStackMap() || MFI.hasPatchPoint() ||
- MFI.hasCopyImplyingStackAdjustment());
+ MFI.hasCopyImplyingStackAdjustment() ||
+ SaveArgs);
}
static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -872,6 +874,24 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
MI->getOperand(3).setIsDead();
}
+// FIXME: Get this from tablegen.
+static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
+ const X86Subtarget &Subtarget) {
+ assert(Subtarget.is64Bit());
+
+ if (Subtarget.isCallingConvWin64(CallConv)) {
+ static const MCPhysReg GPR64ArgRegsWin64[] = {
+ X86::RCX, X86::RDX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
+ }
+
+ static const MCPhysReg GPR64ArgRegs64Bit[] = {
+ X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
+ };
+ return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
+}
+
/// emitPrologue - Push callee-saved registers onto the stack, which
/// automatically adjust the stack pointer. Adjust the stack pointer to allocate
/// space for local variables. Also emit labels used by the exception handler to
@@ -1154,6 +1174,43 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
nullptr, DwarfFramePtr));
}
+ if (SaveArgs && !Fn.arg_empty()) {
+ ArrayRef<MCPhysReg> GPRs =
+ get64BitArgumentGPRs(Fn.getCallingConv(), STI);
+ unsigned arg_size = Fn.arg_size();
+ unsigned RI = 0;
+ int64_t SaveSize = 0;
+
+ if (Fn.hasStructRetAttr()) {
+ GPRs = GPRs.drop_front(1);
+ arg_size--;
+ }
+
+ for (MCPhysReg Reg : GPRs) {
+ if (++RI > arg_size)
+ break;
+
+ SaveSize += SlotSize;
+
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(Reg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
+ // Realign the stack. PUSHes are the most space efficient.
+ while (SaveSize % getStackAlignment()) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(GPRs.front())
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ SaveSize += SlotSize;
+ }
+
+ //dlg StackSize -= SaveSize;
+ //dlg MFI.setStackSize(StackSize);
+ X86FI->setSaveArgSize(SaveSize);
+ }
+
if (NeedsWinFPO) {
// .cv_fpo_setframe $FramePtr
HasWinCFI = true;
@@ -1619,20 +1676,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
uint64_t SEHStackAllocAmt = NumBytes;
- if (HasFP) {
- // Pop EBP.
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
- MachineFramePtr)
- .setMIFlag(MachineInstr::FrameDestroy);
- if (NeedsDwarfCFI) {
- unsigned DwarfStackPtr =
- TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
- BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
- nullptr, DwarfStackPtr, -SlotSize));
- --MBBI;
- }
- }
-
MachineBasicBlock::iterator FirstCSPop = MBBI;
// Skip the callee-saved pop instructions.
while (MBBI != MBB.begin()) {
@@ -1702,6 +1745,28 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
+ if (HasFP) {
+ MBBI = Terminator;
+
+ if (X86FI->getSaveArgSize()) {
+ // LEAVE is effectively mov rbp,rsp; pop rbp
+ BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64), MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ } else {
+ // Pop EBP.
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
+ MachineFramePtr)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+ if (NeedsDwarfCFI) {
+ unsigned DwarfStackPtr =
+ TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
+ BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createDefCfa(
+ nullptr, DwarfStackPtr, -SlotSize));
+ --MBBI;
+ }
+ }
+
// Windows unwinder will not invoke function's exception handler if IP is
// either in prologue or in epilogue. This behavior causes a problem when a
// call immediately precedes an epilogue, because the return address points
@@ -1790,6 +1855,8 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
"FPDelta isn't aligned per the Win64 ABI!");
}
+ if (FI >= 0)
+ Offset -= X86FI->getSaveArgSize();
if (TRI->hasBasePointer(MF)) {
assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
diff --git a/gnu/llvm/lib/Target/X86/X86FrameLowering.h b/gnu/llvm/lib/Target/X86/X86FrameLowering.h
index 430848d4d1d..a301056e89b 100644
--- a/gnu/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/gnu/llvm/lib/Target/X86/X86FrameLowering.h
@@ -37,6 +37,8 @@ public:
const X86RegisterInfo *TRI;
const X86ReturnProtectorLowering RPL;
+ bool SaveArgs;
+
unsigned SlotSize;
/// Is64Bit implies that x86_64 instructions are available.
diff --git a/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index e1183bd1479..393abe698db 100644
--- a/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/gnu/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -41,6 +41,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// stack frame in bytes.
unsigned CalleeSavedFrameSize = 0;
+ // SaveArgSize - Number of register arguments saved on the stack
+ unsigned SaveArgSize = 0;
+
/// BytesToPopOnReturn - Number of bytes function pops on return (in addition
/// to the space used by the return address).
/// Used on windows platform for stdcall & fastcall name decoration
@@ -124,6 +127,9 @@ public:
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+ unsigned getSaveArgSize() const { return SaveArgSize; }
+ void setSaveArgSize(unsigned bytes) { SaveArgSize = bytes; }
+
unsigned getBytesToPopOnReturn() const { return BytesToPopOnReturn; }
void setBytesToPopOnReturn (unsigned bytes) { BytesToPopOnReturn = bytes;}
diff --git a/gnu/llvm/lib/Target/X86/X86Subtarget.h b/gnu/llvm/lib/Target/X86/X86Subtarget.h
index 34143924b44..cf5439e42cf 100644
--- a/gnu/llvm/lib/Target/X86/X86Subtarget.h
+++ b/gnu/llvm/lib/Target/X86/X86Subtarget.h
@@ -400,6 +400,9 @@ protected:
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment = 4;
+ /// Whether function prologues should save register arguments on the stack.
+ unsigned SaveArgs;
+
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
// FIXME: this is a known good value for Yonah. How about others?
@@ -478,6 +481,8 @@ public:
return &getInstrInfo()->getRegisterInfo();
}
+ unsigned getSaveArgs() const { return SaveArgs; }
+
/// Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
diff --git a/gnu/llvm/tools/clang/include/clang/Driver/Options.td b/gnu/llvm/tools/clang/include/clang/Driver/Options.td
index e8afeb469c5..b9b054606fb 100644
--- a/gnu/llvm/tools/clang/include/clang/Driver/Options.td
+++ b/gnu/llvm/tools/clang/include/clang/Driver/Options.td
@@ -2814,6 +2814,8 @@ def mretpoline : Flag<["-"], "mretpoline">, Group<m_x86_Features_Group>;
def mno_retpoline : Flag<["-"], "mno-retpoline">, Group<m_x86_Features_Group>;
def mretpoline_external_thunk : Flag<["-"], "mretpoline-external-thunk">, Group<m_x86_Features_Group>;
def mno_retpoline_external_thunk : Flag<["-"], "mno-retpoline-external-thunk">, Group<m_x86_Features_Group>;
+def msave_args : Flag<["-"], "msave-args">, Group<m_x86_Features_Group>;
+def mno_save_args : Flag<["-"], "mno-save-args">, Group<m_x86_Features_Group>;
// These are legacy user-facing driver-level option spellings. They are always
// aliases for options that are spelled using the more common Unix / GNU flag
diff --git a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
index e295cff9d5d..ffa9e0d2a87 100644
--- a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
+++ b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.cpp
@@ -814,6 +814,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasPTWRITE = true;
} else if (Feature == "+invpcid") {
HasINVPCID = true;
+ } else if (Feature == "+save-args") {
+ HasSaveArgs = true;
}
X86SSEEnum Level = llvm::StringSwitch<X86SSEEnum>(Feature)
@@ -1386,6 +1388,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("movdiri", HasMOVDIRI)
.Case("movdir64b", HasMOVDIR64B)
.Case("mpx", HasMPX)
+ .Case("save-args", HasSaveArgs)
.Case("mwaitx", HasMWAITX)
.Case("pclmul", HasPCLMUL)
.Case("pconfig", HasPCONFIG)
diff --git a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
index 1d23b0ef693..c476326102d 100644
--- a/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
+++ b/gnu/llvm/tools/clang/lib/Basic/Targets/X86.h
@@ -107,6 +107,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasMOVDIR64B = false;
bool HasPTWRITE = false;
bool HasINVPCID = false;
+ bool HasSaveArgs = false;
protected:
/// Enumeration of all of the X86 CPUs supported by Clang.