diff options
Diffstat (limited to 'gnu/llvm/lib/Target/WebAssembly')
51 files changed, 4346 insertions, 1321 deletions
diff --git a/gnu/llvm/lib/Target/WebAssembly/CMakeLists.txt b/gnu/llvm/lib/Target/WebAssembly/CMakeLists.txt index e5c68e59847..b2865f1a0f9 100644 --- a/gnu/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/gnu/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -10,11 +10,11 @@ tablegen(LLVM WebAssemblyGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(WebAssemblyCommonTableGen) add_llvm_target(WebAssemblyCodeGen - Relooper.cpp WebAssemblyArgumentMove.cpp WebAssemblyAsmPrinter.cpp WebAssemblyCFGStackify.cpp WebAssemblyFastISel.cpp + WebAssemblyFixIrreducibleControlFlow.cpp WebAssemblyFrameLowering.cpp WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp @@ -22,14 +22,17 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyLowerBrUnless.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp + WebAssemblyOptimizeLiveIntervals.cpp WebAssemblyOptimizeReturned.cpp WebAssemblyPeephole.cpp - WebAssemblyPEI.cpp + WebAssemblyPrepareForLiveIntervals.cpp WebAssemblyRegisterInfo.cpp WebAssemblyRegColoring.cpp WebAssemblyRegNumbering.cpp WebAssemblyRegStackify.cpp + WebAssemblyReplacePhysRegs.cpp WebAssemblySelectionDAGInfo.cpp + WebAssemblySetP2AlignOperands.cpp WebAssemblyStoreResults.cpp WebAssemblySubtarget.cpp WebAssemblyTargetMachine.cpp diff --git a/gnu/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/gnu/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp index 0143b10c0ab..c0355aef0b3 100644 --- a/gnu/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp @@ -18,7 +18,7 @@ #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -93,6 +93,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( const MCOperandInfo &Info = Desc.OpInfo[i]; switch (Info.OperandType) { case MCOI::OPERAND_IMMEDIATE: + case WebAssembly::OPERAND_P2ALIGN: case WebAssembly::OPERAND_BASIC_BLOCK: { if (Pos + sizeof(uint64_t) > Bytes.size()) return MCDisassembler::Fail; @@ -109,7 +110,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction( MI.addOperand(MCOperand::createReg(Reg)); break; } - case WebAssembly::OPERAND_FPIMM: { + case WebAssembly::OPERAND_FP32IMM: + case WebAssembly::OPERAND_FP64IMM: { // TODO: MC converts all floating point immediate operands to double. // This is fine for numeric values, but may cause NaNs to change bits. if (Pos + sizeof(uint64_t) > Bytes.size()) diff --git a/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index 9a95150cb55..267d716dd1d 100644 --- a/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -110,14 +110,22 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS, } static std::string toString(const APFloat &FP) { + // Print NaNs with custom payloads specially. + if (FP.isNaN() && + !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) && + !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) { + APInt AI = FP.bitcastToAPInt(); + return + std::string(AI.isNegative() ? "-" : "") + "nan:0x" + + utohexstr(AI.getZExtValue() & + (AI.getBitWidth() == 32 ? INT64_C(0x007fffff) : + INT64_C(0x000fffffffffffff)), + /*LowerCase=*/true); + } + + // Use C99's hexadecimal floating-point representation. static const size_t BufBytes = 128; char buf[BufBytes]; - if (FP.isNaN()) - assert((FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) || - FP.bitwiseIsEqual( - APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) && - "convertToHexString handles neither SNaN nor NaN payloads"); - // Use C99's hexadecimal floating-point representation. auto Written = FP.convertToHexString( buf, /*hexDigits=*/0, /*upperCase=*/false, APFloat::rmNearestTiesToEven); (void)Written; @@ -137,11 +145,11 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (int(WAReg) >= 0) printRegName(O, WAReg); else if (OpNo >= MII.get(MI->getOpcode()).getNumDefs()) - O << "$pop" << (WAReg & INT32_MAX); + O << "$pop" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); else if (WAReg != WebAssemblyFunctionInfo::UnusedReg) - O << "$push" << (WAReg & INT32_MAX); + O << "$push" << WebAssemblyFunctionInfo::getWARegStackId(WAReg); else - O << "$discard"; + O << "$drop"; // Add a '=' suffix if this is a def. if (OpNo < MII.get(MI->getOpcode()).getNumDefs()) O << '='; @@ -157,10 +165,20 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, // control flow stack, and it may be nice to pretty-print. O << Op.getImm(); } else if (Op.isFPImm()) { - assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || - MII.get(MI->getOpcode()).TSFlags == 0) && + const MCInstrDesc &Desc = MII.get(MI->getOpcode()); + assert(OpNo < Desc.getNumOperands() && + "Unexpected floating-point immediate as a non-fixed operand"); + assert(Desc.TSFlags == 0 && "WebAssembly variable_ops floating point ops don't use TSFlags"); - O << toString(APFloat(Op.getFPImm())); + const MCOperandInfo &Info = Desc.OpInfo[OpNo]; + if (Info.OperandType == WebAssembly::OPERAND_FP32IMM) { + // TODO: MC converts all floating point immediate operands to double. + // This is fine for numeric values, but may cause NaNs to change bits. + O << toString(APFloat(float(Op.getFPImm()))); + } else { + assert(Info.OperandType == WebAssembly::OPERAND_FP64IMM); + O << toString(APFloat(Op.getFPImm())); + } } else { assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() || (MII.get(MI->getOpcode()).TSFlags & @@ -172,6 +190,16 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } +void +WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI, + unsigned OpNo, + raw_ostream &O) { + int64_t Imm = MI->getOperand(OpNo).getImm(); + if (Imm == WebAssembly::GetDefaultP2Align(MI->getOpcode())) + return; + O << ":p2align=" << Imm; +} + const char *llvm::WebAssembly::TypeToString(MVT Ty) { switch (Ty.SimpleTy) { case MVT::i32: diff --git a/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index cd6c59a41c3..07b0f914e44 100644 --- a/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/gnu/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -15,8 +15,9 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H -#include "llvm/MC/MCInstPrinter.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineValueType.h" +#include "llvm/MC/MCInstPrinter.h" namespace llvm { @@ -36,6 +37,8 @@ public: // Used by tblegen code. void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); + void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo, + raw_ostream &O); // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &O); diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp index bba06f65e16..df6fb8968d5 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp @@ -55,7 +55,8 @@ public: bool mayNeedRelaxation(const MCInst &Inst) const override { return false; } - void relaxInstruction(const MCInst &Inst, MCInst &Res) const override {} + void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI, + MCInst &Res) const override {} bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override; }; @@ -73,8 +74,10 @@ void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value, bool IsPCRel) const { const MCFixupKindInfo &Info = getFixupKindInfo(Fixup.getKind()); - unsigned NumBytes = RoundUpToAlignment(Info.TargetSize, 8); - if (!Value) + assert(Info.Flags == 0 && "WebAssembly does not use MCFixupKindInfo flags"); + + unsigned NumBytes = (Info.TargetSize + 7) / 8; + if (Value == 0) return; // Doesn't change encoding. // Shift the value into position. diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp index 2bb58b33934..2146f67959b 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp @@ -25,8 +25,8 @@ public: WebAssemblyELFObjectWriter(bool Is64Bit, uint8_t OSABI); protected: - unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel) const override; + unsigned getRelocType(MCContext &Ctx, const MCValue &Target, + const MCFixup &Fixup, bool IsPCRel) const override; }; } // end anonymous namespace @@ -35,7 +35,8 @@ WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit, : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_WEBASSEMBLY, /*HasRelocationAddend=*/false) {} -unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target, +unsigned WebAssemblyELFObjectWriter::getRelocType(MCContext &Ctx, + const MCValue &Target, const MCFixup &Fixup, bool IsPCRel) const { // WebAssembly functions are not allocated in the address space. To resolve a diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp index 02c717a9210..d8c39216c53 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp @@ -15,7 +15,6 @@ #include "WebAssemblyMCAsmInfo.h" #include "llvm/ADT/Triple.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; #define DEBUG_TYPE "wasm-mc-asm-info" @@ -48,4 +47,7 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) { ExceptionsType = ExceptionHandling::None; // TODO: UseIntegratedAssembler? + + // WebAssembly's stack is never executable. + UsesNonexecutableStackSection = false; } diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp index f409bd77442..23f8b3d0e82 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -22,6 +22,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/EndianStream.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -33,7 +34,6 @@ STATISTIC(MCNumFixups, "Number of MC fixups created."); namespace { class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCInstrInfo &MCII; - const MCContext &Ctx; // Implementation generated by tablegen. uint64_t getBinaryCodeForInstr(const MCInst &MI, @@ -45,14 +45,12 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter { const MCSubtargetInfo &STI) const override; public: - WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) - : MCII(mcii), Ctx(ctx) {} + WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {} }; } // end anonymous namespace -MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx) { - return new WebAssemblyMCCodeEmitter(MCII, Ctx); +MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) { + return new WebAssemblyMCCodeEmitter(MCII); } void WebAssemblyMCCodeEmitter::encodeInstruction( @@ -78,7 +76,8 @@ void WebAssemblyMCCodeEmitter::encodeInstruction( support::endian::Writer<support::little>(OS).write<uint64_t>(0); Fixups.push_back(MCFixup::create( (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t), - MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4, + MO.getExpr(), + STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4, MI.getLoc())); ++MCNumFixups; } else { diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp index 37000f1cd57..ac11a64086f 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp @@ -16,7 +16,6 @@ #include "InstPrinter/WebAssemblyInstPrinter.h" #include "WebAssemblyMCAsmInfo.h" #include "WebAssemblyTargetStreamer.h" -#include "llvm/MC/MCCodeGenInfo.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" @@ -40,6 +39,15 @@ static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/, return new WebAssemblyMCAsmInfo(TT); } +static void adjustCodeGenOpts(const Triple & /*TT*/, Reloc::Model /*RM*/, + CodeModel::Model &CM) { + CodeModel::Model M = (CM == CodeModel::Default || CM == CodeModel::JITDefault) + ? CodeModel::Large + : CM; + if (M != CodeModel::Large) + report_fatal_error("Non-large code models are not supported yet"); +} + static MCInstrInfo *createMCInstrInfo() { MCInstrInfo *X = new MCInstrInfo(); InitWebAssemblyMCInstrInfo(X); @@ -57,14 +65,14 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/, const MCAsmInfo &MAI, const MCInstrInfo &MII, const MCRegisterInfo &MRI) { - assert(SyntaxVariant == 0); + assert(SyntaxVariant == 0 && "WebAssembly only has one syntax variant"); return new WebAssemblyInstPrinter(MAI, MII, MRI); } static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo & /*MRI*/, - MCContext &Ctx) { - return createWebAssemblyMCCodeEmitter(MCII, Ctx); + MCContext & /*Ctx*/) { + return createWebAssemblyMCCodeEmitter(MCII); } static MCAsmBackend *createAsmBackend(const Target & /*T*/, @@ -99,6 +107,9 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() { // Register the MC instruction info. TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo); + // Register the MC codegen info. + TargetRegistry::registerMCAdjustCodeGenOpts(*T, adjustCodeGenOpts); + // Register the MC register info. TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo); diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 9bac4f82822..001bd7f1fc4 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -33,8 +33,7 @@ class raw_pwrite_stream; extern Target TheWebAssemblyTarget32; extern Target TheWebAssemblyTarget64; -MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, - MCContext &Ctx); +MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII); MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT); @@ -45,8 +44,12 @@ namespace WebAssembly { enum OperandType { /// Basic block label in a branch construct. OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET, - /// Floating-point immediate. - OPERAND_FPIMM + /// 32-bit floating-point immediates. + OPERAND_FP32IMM, + /// 64-bit floating-point immediates. + OPERAND_FP64IMM, + /// p2align immediate for load and store address alignment. + OPERAND_P2ALIGN }; /// WebAssembly-specific directive identifiers. @@ -87,4 +90,49 @@ enum { #define GET_SUBTARGETINFO_ENUM #include "WebAssemblyGenSubtargetInfo.inc" +namespace llvm { +namespace WebAssembly { + +/// Return the default p2align value for a load or store with the given opcode. +inline unsigned GetDefaultP2Align(unsigned Opcode) { + switch (Opcode) { + case WebAssembly::LOAD8_S_I32: + case WebAssembly::LOAD8_U_I32: + case WebAssembly::LOAD8_S_I64: + case WebAssembly::LOAD8_U_I64: + case WebAssembly::STORE8_I32: + case WebAssembly::STORE8_I64: + return 0; + case WebAssembly::LOAD16_S_I32: + case WebAssembly::LOAD16_U_I32: + case WebAssembly::LOAD16_S_I64: + case WebAssembly::LOAD16_U_I64: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE16_I64: + return 1; + case WebAssembly::LOAD_I32: + case WebAssembly::LOAD_F32: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_F32: + case WebAssembly::LOAD32_S_I64: + case WebAssembly::LOAD32_U_I64: + case WebAssembly::STORE32_I64: + return 2; + case WebAssembly::LOAD_I64: + case WebAssembly::LOAD_F64: + case WebAssembly::STORE_I64: + case WebAssembly::STORE_F64: + return 3; + default: llvm_unreachable("Only loads and stores have p2align values"); + } +} + +/// The operand number of the load or store address in load/store instructions. +static const unsigned MemOpAddressOperandNo = 2; +/// The operand number of the stored value in a store instruction. +static const unsigned StoreValueOperandNo = 4; + +} // end namespace WebAssembly +} // end namespace llvm + #endif diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp index 1d2822869a1..3d61c15717b 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp @@ -16,12 +16,10 @@ #include "WebAssemblyTargetStreamer.h" #include "InstPrinter/WebAssemblyInstPrinter.h" #include "WebAssemblyMCTargetDesc.h" -#include "WebAssemblyTargetObjectFile.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbolELF.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -66,6 +64,16 @@ void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) { void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; } +void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType( + StringRef name, SmallVectorImpl<MVT> &SignatureVTs, size_t NumResults) { + OS << "\t.functype\t" << name; + if (NumResults == 0) OS << ", void"; + for (auto Ty : SignatureVTs) { + OS << ", " << WebAssembly::TypeToString(Ty); + } + OS << "\n"; +} + // FIXME: What follows is not the real binary encoding. static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) { diff --git a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h index c66a51574ef..51354ef22d7 100644 --- a/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h +++ b/gnu/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h @@ -37,6 +37,12 @@ public: virtual void emitLocal(ArrayRef<MVT> Types) = 0; /// .endfunc virtual void emitEndFunc() = 0; + /// .functype + virtual void emitIndirectFunctionType(StringRef name, + SmallVectorImpl<MVT> &SignatureVTs, + size_t NumResults) { + llvm_unreachable("emitIndirectFunctionType not implemented"); + } }; /// This part is for ascii assembly output @@ -50,6 +56,9 @@ public: void emitResult(ArrayRef<MVT> Types) override; void emitLocal(ArrayRef<MVT> Types) override; void emitEndFunc() override; + void emitIndirectFunctionType(StringRef name, + SmallVectorImpl<MVT> &SignatureVTs, + size_t NumResults) override; }; /// This part is for ELF object output diff --git a/gnu/llvm/lib/Target/WebAssembly/README.txt b/gnu/llvm/lib/Target/WebAssembly/README.txt index b97ea454165..a6c2eefc057 100644 --- a/gnu/llvm/lib/Target/WebAssembly/README.txt +++ b/gnu/llvm/lib/Target/WebAssembly/README.txt @@ -13,32 +13,18 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md The backend is built, tested and archived on the following waterfall: - https://build.chromium.org/p/client.wasm.llvm/console + https://wasm-stat.us The backend's bringup is done using the GCC torture test suite first since it doesn't require C library support. Current known failures are in known_gcc_test_failures.txt, all other tests should pass. The waterfall will turn red if not. Once most of these pass, further testing will use LLVM's own test suite. The tests can be run locally using: - github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py - -Interesting work that remains to be done: -* Write a pass to restructurize irreducible control flow. This needs to be done - before register allocation to be efficient, because it may duplicate basic - blocks and WebAssembly performs register allocation at a whole-function - level. Note that LLVM's GPU code has such a pass, but it linearizes control - flow (e.g. both sides of branches execute and are masked) which is undesirable - for WebAssembly. + https://github.com/WebAssembly/waterfall/blob/master/src/compile_torture_tests.py //===---------------------------------------------------------------------===// -set_local instructions have a return value. We should (a) model this, -and (b) write optimizations which take advantage of it. Keep in mind that -many set_local instructions are implicit! - -//===---------------------------------------------------------------------===// - -Br, br_if, and tableswitch instructions can support having a value on the +Br, br_if, and br_table instructions can support having a value on the expression stack across the jump (sometimes). We should (a) model this, and (b) extend the stackifier to utilize it. @@ -58,10 +44,6 @@ us too? //===---------------------------------------------------------------------===// -When is it profitable to set isAsCheapAsAMove on instructions in WebAssembly? - -//===---------------------------------------------------------------------===// - Register stackification uses the EXPR_STACK physical register to impose ordering dependencies on instructions with stack operands. This is pessimistic; we should consider alternate ways to model stack dependencies. @@ -82,7 +64,74 @@ stores. //===---------------------------------------------------------------------===// -Memset/memcpy/memmove should be marked with the "returned" attribute somehow, -even when they are translated through intrinsics. +Consider implementing optimizeSelect, optimizeCompareInstr, optimizeCondBranch, +optimizeLoadInstr, and/or getMachineCombinerPatterns. + +//===---------------------------------------------------------------------===// + +Find a clean way to fix the problem which leads to the Shrink Wrapping pass +being run after the WebAssembly PEI pass. + +//===---------------------------------------------------------------------===// + +When setting multiple local variables to the same constant, we currently get +code like this: + + i32.const $4=, 0 + i32.const $3=, 0 + +It could be done with a smaller encoding like this: + + i32.const $push5=, 0 + tee_local $push6=, $4=, $pop5 + copy_local $3=, $pop6 + +//===---------------------------------------------------------------------===// + +WebAssembly registers are implicitly initialized to zero. Explicit zeroing is +therefore often redundant and could be optimized away. + +//===---------------------------------------------------------------------===// + +Small indices may use smaller encodings than large indices. +WebAssemblyRegColoring and/or WebAssemblyRegRenumbering should sort registers +according to their usage frequency to maximize the usage of smaller encodings. + +//===---------------------------------------------------------------------===// + +When the last statement in a function body computes the return value, it can +just let that value be the exit value of the outermost block, rather than +needing an explicit return operation. + +//===---------------------------------------------------------------------===// + +Many cases of irreducible control flow could be transformed more optimally +than via the transform in WebAssemblyFixIrreducibleControlFlow.cpp. + +It may also be worthwhile to do transforms before register coloring, +particularly when duplicating code, to allow register coloring to be aware of +the duplication. + +//===---------------------------------------------------------------------===// + +WebAssemblyRegStackify could use AliasAnalysis to reorder loads and stores more +aggressively. + +//===---------------------------------------------------------------------===// + +WebAssemblyRegStackify is currently a greedy algorithm. This means that, for +example, a binary operator will stackify with its user before its operands. +However, if moving the binary operator to its user moves it to a place where +its operands can't be moved to, it would be better to leave it in place, or +perhaps move it up, so that it can stackify its operands. A binary operator +has two operands and one result, so in such cases there could be a net win by +prefering the operands. + +//===---------------------------------------------------------------------===// + +Instruction ordering has a significant influence on register stackification and +coloring. Consider experimenting with the MachineScheduler (enable via +enableMachineScheduler) and determine if it can be configured to schedule +instructions advantageously for this purpose. //===---------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssembly.h b/gnu/llvm/lib/Target/WebAssembly/WebAssembly.h index e972da5af74..957f31cae22 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -23,23 +23,28 @@ namespace llvm { class WebAssemblyTargetMachine; class FunctionPass; +// LLVM IR passes. FunctionPass *createWebAssemblyOptimizeReturned(); +// ISel and immediate followup passes. FunctionPass *createWebAssemblyISelDag(WebAssemblyTargetMachine &TM, CodeGenOpt::Level OptLevel); FunctionPass *createWebAssemblyArgumentMove(); +FunctionPass *createWebAssemblySetP2AlignOperands(); +// Late passes. +FunctionPass *createWebAssemblyReplacePhysRegs(); +FunctionPass *createWebAssemblyPrepareForLiveIntervals(); +FunctionPass *createWebAssemblyOptimizeLiveIntervals(); FunctionPass *createWebAssemblyStoreResults(); FunctionPass *createWebAssemblyRegStackify(); FunctionPass *createWebAssemblyRegColoring(); -FunctionPass *createWebAssemblyPEI(); +FunctionPass *createWebAssemblyFixIrreducibleControlFlow(); FunctionPass *createWebAssemblyCFGStackify(); FunctionPass *createWebAssemblyLowerBrUnless(); FunctionPass *createWebAssemblyRegNumbering(); FunctionPass *createWebAssemblyPeephole(); -FunctionPass *createWebAssemblyRelooper(); - } // end namespace llvm #endif diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp index 3893c408cf6..5887f45371f 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp @@ -65,8 +65,8 @@ FunctionPass *llvm::createWebAssemblyArgumentMove() { } /// Test whether the given instruction is an ARGUMENT. -static bool IsArgument(const MachineInstr *MI) { - switch (MI->getOpcode()) { +static bool IsArgument(const MachineInstr &MI) { + switch (MI.getOpcode()) { case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: @@ -88,20 +88,18 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock::iterator InsertPt = EntryMBB.end(); // Look for the first NonArg instruction. - for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) { - MachineInstr *MI = MII; + for (MachineInstr &MI : EntryMBB) { if (!IsArgument(MI)) { - InsertPt = MII; + InsertPt = MI; break; } } // Now move any argument instructions later in the block // to before our first NonArg instruction. - for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) { - MachineInstr *MI = I; + for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) { if (IsArgument(MI)) { - EntryMBB.insert(InsertPt, MI->removeFromParent()); + EntryMBB.insert(InsertPt, MI.removeFromParent()); Changed = true; } } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index 45ac99d90ed..54e9f7f5290 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -67,6 +67,7 @@ private: // AsmPrinter Implementation. //===------------------------------------------------------------------===// + void EmitEndOfAsmFile(Module &M) override; void EmitJumpTableInfo() override; void EmitConstantPool() override; void EmitFunctionBodyStart() override; @@ -93,10 +94,7 @@ private: //===----------------------------------------------------------------------===// MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const { - const TargetRegisterClass *TRC = - TargetRegisterInfo::isVirtualRegister(RegNo) - ? MRI->getRegClass(RegNo) - : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo); + const TargetRegisterClass *TRC = MRI->getRegClass(RegNo); for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) if (TRC->hasType(T)) return T; @@ -119,8 +117,7 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { return '$' + utostr(WAReg); } -WebAssemblyTargetStreamer * -WebAssemblyAsmPrinter::getTargetStreamer() { +WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() { MCTargetStreamer *TS = OutStreamer->getTargetStreamer(); return static_cast<WebAssemblyTargetStreamer *>(TS); } @@ -128,16 +125,6 @@ WebAssemblyAsmPrinter::getTargetStreamer() { //===----------------------------------------------------------------------===// // WebAssemblyAsmPrinter Implementation. //===----------------------------------------------------------------------===// - -void WebAssemblyAsmPrinter::EmitConstantPool() { - assert(MF->getConstantPool()->getConstants().empty() && - "WebAssembly disables constant pools"); -} - -void WebAssemblyAsmPrinter::EmitJumpTableInfo() { - // Nothing to do; jump tables are incorporated into the instruction stream. -} - static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, Type *Ty, SmallVectorImpl<MVT> &ValueVTs) { const DataLayout &DL(F.getParent()->getDataLayout()); @@ -154,6 +141,42 @@ static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM, } } +void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) { + for (const auto &F : M) { + // Emit function type info for all undefined functions + if (F.isDeclarationForLinker() && !F.isIntrinsic()) { + SmallVector<MVT, 4> SignatureVTs; + ComputeLegalValueVTs(F, TM, F.getReturnType(), SignatureVTs); + size_t NumResults = SignatureVTs.size(); + if (SignatureVTs.size() > 1) { + // WebAssembly currently can't lower returns of multiple values without + // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So + // replace multiple return values with a pointer parameter. + SignatureVTs.clear(); + SignatureVTs.push_back( + MVT::getIntegerVT(M.getDataLayout().getPointerSizeInBits())); + NumResults = 0; + } + + for (auto &Arg : F.args()) { + ComputeLegalValueVTs(F, TM, Arg.getType(), SignatureVTs); + } + + getTargetStreamer()->emitIndirectFunctionType(F.getName(), SignatureVTs, + NumResults); + } + } +} + +void WebAssemblyAsmPrinter::EmitConstantPool() { + assert(MF->getConstantPool()->getConstants().empty() && + "WebAssembly disables constant pools"); +} + +void WebAssemblyAsmPrinter::EmitJumpTableInfo() { + // Nothing to do; jump tables are incorporated into the instruction stream. +} + void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { if (!MFI->getParams().empty()) getTargetStreamer()->emitParam(MFI->getParams()); @@ -184,13 +207,6 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() { LocalTypes.push_back(getRegType(VReg)); AnyWARegs = true; } - auto &PhysRegs = MFI->getPhysRegs(); - for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) { - if (PhysRegs[PReg] == -1U) - continue; - LocalTypes.push_back(getRegType(PReg)); - AnyWARegs = true; - } if (AnyWARegs) getTargetStreamer()->emitLocal(LocalTypes); @@ -212,6 +228,30 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) { // These represent values which are live into the function entry, so there's // no instruction to emit. break; + case WebAssembly::FALLTHROUGH_RETURN_I32: + case WebAssembly::FALLTHROUGH_RETURN_I64: + case WebAssembly::FALLTHROUGH_RETURN_F32: + case WebAssembly::FALLTHROUGH_RETURN_F64: { + // These instructions represent the implicit return at the end of a + // function body. The operand is always a pop. + assert(MFI->isVRegStackified(MI->getOperand(0).getReg())); + + if (isVerbose()) { + OutStreamer->AddComment("fallthrough-return: $pop" + + utostr(MFI->getWARegStackId( + MFI->getWAReg(MI->getOperand(0).getReg())))); + OutStreamer->AddBlankLine(); + } + break; + } + case WebAssembly::FALLTHROUGH_RETURN_VOID: + // This instruction represents the implicit return at the end of a + // function body with no return value. + if (isVerbose()) { + OutStreamer->AddComment("fallthrough-return"); + OutStreamer->AddBlankLine(); + } + break; default: { WebAssemblyMCInstLower MCInstLowering(OutContext, *this); MCInst TmpInst; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp index a39349c562f..33166f5b554 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp @@ -10,10 +10,10 @@ /// \file /// \brief This file implements a CFG stacking pass. /// -/// This pass reorders the blocks in a function to put them into a reverse -/// post-order [0], with special care to keep the order as similar as possible -/// to the original order, and to keep loops contiguous even in the case of -/// split backedges. +/// This pass reorders the blocks in a function to put them into topological +/// order, ignoring loop backedges, and without any loop being interrupted +/// by a block not dominated by the loop header, with special care to keep the +/// order as similar as possible to the original order. /// /// Then, it inserts BLOCK and LOOP markers to mark the start of scopes, since /// scope boundaries serve as the labels for WebAssembly's control transfers. @@ -21,14 +21,13 @@ /// This is sufficient to convert arbitrary CFGs into a form that works on /// WebAssembly, provided that all loops are single-entry. /// -/// [0] https://en.wikipedia.org/wiki/Depth-first_search#Vertex_orderings -/// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" -#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,90 +69,6 @@ FunctionPass *llvm::createWebAssemblyCFGStackify() { return new WebAssemblyCFGStackify(); } -static void EliminateMultipleEntryLoops(MachineFunction &MF, - const MachineLoopInfo &MLI) { - SmallPtrSet<MachineBasicBlock *, 8> InSet; - for (scc_iterator<MachineFunction *> I = scc_begin(&MF), E = scc_end(&MF); - I != E; ++I) { - const std::vector<MachineBasicBlock *> &CurrentSCC = *I; - - // Skip trivial SCCs. - if (CurrentSCC.size() == 1) - continue; - - InSet.insert(CurrentSCC.begin(), CurrentSCC.end()); - MachineBasicBlock *Header = nullptr; - for (MachineBasicBlock *MBB : CurrentSCC) { - for (MachineBasicBlock *Pred : MBB->predecessors()) { - if (InSet.count(Pred)) - continue; - if (!Header) { - Header = MBB; - break; - } - // TODO: Implement multiple-entry loops. - report_fatal_error("multiple-entry loops are not supported yet"); - } - } - assert(MLI.isLoopHeader(Header)); - - InSet.clear(); - } -} - -namespace { -/// Post-order traversal stack entry. -struct POStackEntry { - MachineBasicBlock *MBB; - SmallVector<MachineBasicBlock *, 0> Succs; - - POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, - const MachineLoopInfo &MLI); -}; -} // end anonymous namespace - -static bool LoopContains(const MachineLoop *Loop, - const MachineBasicBlock *MBB) { - return Loop ? Loop->contains(MBB) : true; -} - -POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF, - const MachineLoopInfo &MLI) - : MBB(MBB), Succs(MBB->successors()) { - // RPO is not a unique form, since at every basic block with multiple - // successors, the DFS has to pick which order to visit the successors in. - // Sort them strategically (see below). - MachineLoop *Loop = MLI.getLoopFor(MBB); - MachineFunction::iterator Next = next(MachineFunction::iterator(MBB)); - MachineBasicBlock *LayoutSucc = Next == MF.end() ? nullptr : &*Next; - std::stable_sort( - Succs.begin(), Succs.end(), - [=, &MLI](const MachineBasicBlock *A, const MachineBasicBlock *B) { - if (A == B) - return false; - - // Keep loops contiguous by preferring the block that's in the same - // loop. - bool LoopContainsA = LoopContains(Loop, A); - bool LoopContainsB = LoopContains(Loop, B); - if (LoopContainsA && !LoopContainsB) - return true; - if (!LoopContainsA && LoopContainsB) - return false; - - // Minimize perturbation by preferring the block which is the immediate - // layout successor. - if (A == LayoutSucc) - return true; - if (B == LayoutSucc) - return false; - - // TODO: More sophisticated orderings may be profitable here. - - return false; - }); -} - /// Return the "bottom" block of a loop. This differs from /// MachineLoop::getBottomBlock in that it works even if the loop is /// discontiguous. @@ -165,53 +80,166 @@ static MachineBasicBlock *LoopBottom(const MachineLoop *Loop) { return Bottom; } -/// Sort the blocks in RPO, taking special care to make sure that loops are -/// contiguous even in the case of split backedges. -/// -/// TODO: Determine whether RPO is actually worthwhile, or whether we should -/// move to just a stable-topological-sort-based approach that would preserve -/// more of the original order. -static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) { - // Note that we do our own RPO rather than using - // "llvm/ADT/PostOrderIterator.h" because we want control over the order that - // successors are visited in (see above). Also, we can sort the blocks in the - // MachineFunction as we go. - SmallPtrSet<MachineBasicBlock *, 16> Visited; - SmallVector<POStackEntry, 16> Stack; - - MachineBasicBlock *EntryBlock = &*MF.begin(); - Visited.insert(EntryBlock); - Stack.push_back(POStackEntry(EntryBlock, MF, MLI)); - - for (;;) { - POStackEntry &Entry = Stack.back(); - SmallVectorImpl<MachineBasicBlock *> &Succs = Entry.Succs; - if (!Succs.empty()) { - MachineBasicBlock *Succ = Succs.pop_back_val(); - if (Visited.insert(Succ).second) - Stack.push_back(POStackEntry(Succ, MF, MLI)); - continue; - } +static void MaybeUpdateTerminator(MachineBasicBlock *MBB) { +#ifndef NDEBUG + bool AnyBarrier = false; +#endif + bool AllAnalyzable = true; + for (const MachineInstr &Term : MBB->terminators()) { +#ifndef NDEBUG + AnyBarrier |= Term.isBarrier(); +#endif + AllAnalyzable &= Term.isBranch() && !Term.isIndirectBranch(); + } + assert((AnyBarrier || AllAnalyzable) && + "AnalyzeBranch needs to analyze any block with a fallthrough"); + if (AllAnalyzable) + MBB->updateTerminator(); +} - // Put the block in its position in the MachineFunction. - MachineBasicBlock &MBB = *Entry.MBB; - MBB.moveBefore(&*MF.begin()); - - // Branch instructions may utilize a fallthrough, so update them if a - // fallthrough has been added or removed. - if (!MBB.empty() && MBB.back().isTerminator() && !MBB.back().isBranch() && - !MBB.back().isBarrier()) - report_fatal_error( - "Non-branch terminator with fallthrough cannot yet be rewritten"); - if (MBB.empty() || !MBB.back().isTerminator() || MBB.back().isBranch()) - MBB.updateTerminator(); - - Stack.pop_back(); - if (Stack.empty()) - break; +namespace { +/// Sort blocks by their number. +struct CompareBlockNumbers { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + return A->getNumber() > B->getNumber(); + } +}; +/// Sort blocks by their number in the opposite order.. +struct CompareBlockNumbersBackwards { + bool operator()(const MachineBasicBlock *A, + const MachineBasicBlock *B) const { + return A->getNumber() < B->getNumber(); } +}; +/// Bookkeeping for a loop to help ensure that we don't mix blocks not dominated +/// by the loop header among the loop's blocks. +struct Entry { + const MachineLoop *Loop; + unsigned NumBlocksLeft; + + /// List of blocks not dominated by Loop's header that are deferred until + /// after all of Loop's blocks have been seen. + std::vector<MachineBasicBlock *> Deferred; + + explicit Entry(const MachineLoop *L) + : Loop(L), NumBlocksLeft(L->getNumBlocks()) {} +}; +} - // Now that we've sorted the blocks in RPO, renumber them. +/// Sort the blocks, taking special care to make sure that loops are not +/// interrupted by blocks not dominated by their header. +/// TODO: There are many opportunities for improving the heuristics here. +/// Explore them. +static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI, + const MachineDominatorTree &MDT) { + // Prepare for a topological sort: Record the number of predecessors each + // block has, ignoring loop backedges. + MF.RenumberBlocks(); + SmallVector<unsigned, 16> NumPredsLeft(MF.getNumBlockIDs(), 0); + for (MachineBasicBlock &MBB : MF) { + unsigned N = MBB.pred_size(); + if (MachineLoop *L = MLI.getLoopFor(&MBB)) + if (L->getHeader() == &MBB) + for (const MachineBasicBlock *Pred : MBB.predecessors()) + if (L->contains(Pred)) + --N; + NumPredsLeft[MBB.getNumber()] = N; + } + + // Topological sort the CFG, with additional constraints: + // - Between a loop header and the last block in the loop, there can be + // no blocks not dominated by the loop header. + // - It's desirable to preserve the original block order when possible. + // We use two ready lists; Preferred and Ready. Preferred has recently + // processed sucessors, to help preserve block sequences from the original + // order. Ready has the remaining ready blocks. + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbers> + Preferred; + PriorityQueue<MachineBasicBlock *, std::vector<MachineBasicBlock *>, + CompareBlockNumbersBackwards> + Ready; + SmallVector<Entry, 4> Loops; + for (MachineBasicBlock *MBB = &MF.front();;) { + const MachineLoop *L = MLI.getLoopFor(MBB); + if (L) { + // If MBB is a loop header, add it to the active loop list. We can't put + // any blocks that it doesn't dominate until we see the end of the loop. + if (L->getHeader() == MBB) + Loops.push_back(Entry(L)); + // For each active loop the block is in, decrement the count. If MBB is + // the last block in an active loop, take it off the list and pick up any + // blocks deferred because the header didn't dominate them. + for (Entry &E : Loops) + if (E.Loop->contains(MBB) && --E.NumBlocksLeft == 0) + for (auto DeferredBlock : E.Deferred) + Ready.push(DeferredBlock); + while (!Loops.empty() && Loops.back().NumBlocksLeft == 0) + Loops.pop_back(); + } + // The main topological sort logic. + for (MachineBasicBlock *Succ : MBB->successors()) { + // Ignore backedges. + if (MachineLoop *SuccL = MLI.getLoopFor(Succ)) + if (SuccL->getHeader() == Succ && SuccL->contains(MBB)) + continue; + // Decrement the predecessor count. If it's now zero, it's ready. + if (--NumPredsLeft[Succ->getNumber()] == 0) + Preferred.push(Succ); + } + // Determine the block to follow MBB. First try to find a preferred block, + // to preserve the original block order when possible. + MachineBasicBlock *Next = nullptr; + while (!Preferred.empty()) { + Next = Preferred.top(); + Preferred.pop(); + // If X isn't dominated by the top active loop header, defer it until that + // loop is done. + if (!Loops.empty() && + !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { + Loops.back().Deferred.push_back(Next); + Next = nullptr; + continue; + } + // If Next was originally ordered before MBB, and it isn't because it was + // loop-rotated above the header, it's not preferred. + if (Next->getNumber() < MBB->getNumber() && + (!L || !L->contains(Next) || + L->getHeader()->getNumber() < Next->getNumber())) { + Ready.push(Next); + Next = nullptr; + continue; + } + break; + } + // If we didn't find a suitable block in the Preferred list, check the + // general Ready list. + if (!Next) { + // If there are no more blocks to process, we're done. + if (Ready.empty()) { + MaybeUpdateTerminator(MBB); + break; + } + for (;;) { + Next = Ready.top(); + Ready.pop(); + // If Next isn't dominated by the top active loop header, defer it until + // that loop is done. + if (!Loops.empty() && + !MDT.dominates(Loops.back().Loop->getHeader(), Next)) { + Loops.back().Deferred.push_back(Next); + continue; + } + break; + } + } + // Move the next block into place and iterate. + Next->moveAfter(MBB); + MaybeUpdateTerminator(MBB); + MBB = Next; + } + assert(Loops.empty() && "Active loop list not finished"); MF.RenumberBlocks(); #ifndef NDEBUG @@ -266,12 +294,26 @@ static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, return false; } +/// Test whether MI is a child of some other node in an expression tree. +static bool IsChild(const MachineInstr &MI, + const WebAssemblyFunctionInfo &MFI) { + if (MI.getNumOperands() == 0) + return false; + const MachineOperand &MO = MI.getOperand(0); + if (!MO.isReg() || MO.isImplicit() || !MO.isDef()) + return false; + unsigned Reg = MO.getReg(); + return TargetRegisterInfo::isVirtualRegister(Reg) && + MFI.isVRegStackified(Reg); +} + /// Insert a BLOCK marker for branches to MBB (if needed). static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, SmallVectorImpl<MachineBasicBlock *> &ScopeTops, const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + WebAssemblyFunctionInfo &MFI) { // First compute the nearest common dominator of all forward non-fallthrough // predecessors so that we minimize the time that the BLOCK is on the stack, // which reduces overall stack height. @@ -319,14 +361,15 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF, MachineLoop *HeaderLoop = MLI.getLoopFor(Header); if (HeaderLoop && MBB.getNumber() > LoopBottom(HeaderLoop)->getNumber()) { // Header is the header of a loop that does not lexically contain MBB, so - // the BLOCK needs to be above the LOOP. + // the BLOCK needs to be above the LOOP, after any END constructs. InsertPos = Header->begin(); + while (InsertPos->getOpcode() != WebAssembly::LOOP) + ++InsertPos; } else { // Otherwise, insert the BLOCK as late in Header as we can, but before the // beginning of the local expression tree and any nested BLOCKs. InsertPos = Header->getFirstTerminator(); - while (InsertPos != Header->begin() && - prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) && + while (InsertPos != Header->begin() && IsChild(*prev(InsertPos), MFI) && prev(InsertPos)->getOpcode() != WebAssembly::LOOP && prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK && prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP) @@ -388,7 +431,7 @@ static void PlaceLoopMarker( assert((!ScopeTops[AfterLoop->getNumber()] || ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) && - "With RPO we should visit the outer-most loop for a block first."); + "With block sorting the outermost loop for a block should be first."); if (!ScopeTops[AfterLoop->getNumber()]) ScopeTops[AfterLoop->getNumber()] = &MBB; } @@ -409,7 +452,8 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack, /// Insert LOOP and BLOCK markers at appropriate places. static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, const WebAssemblyInstrInfo &TII, - MachineDominatorTree &MDT) { + MachineDominatorTree &MDT, + WebAssemblyFunctionInfo &MFI) { // For each block whose label represents the end of a scope, record the block // which holds the beginning of the scope. This will allow us to quickly skip // over scoped regions when walking blocks. We allocate one more than the @@ -425,7 +469,7 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI, PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI); // Place the BLOCK for MBB if MBB is branched to from above. - PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT); + PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT, MFI); } // Now rewrite references to basic blocks to be depth immediates. @@ -478,16 +522,14 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) { auto &MDT = getAnalysis<MachineDominatorTree>(); // Liveness is not tracked for EXPR_STACK physreg. const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); MF.getRegInfo().invalidateLiveness(); - // RPO sorting needs all loops to be single-entry. - EliminateMultipleEntryLoops(MF, MLI); - - // Sort the blocks in RPO, with contiguous loops. - SortBlocks(MF, MLI); + // Sort the blocks, with contiguous loops. + SortBlocks(MF, MLI, MDT); // Place the BLOCK and LOOP markers to indicate the beginnings of scopes. - PlaceMarkers(MF, MLI, TII, MDT); + PlaceMarkers(MF, MLI, TII, MDT, MFI); return true; } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 1b761b1a9d7..7bfa4074849 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -12,10 +12,13 @@ /// class. Some of the target-specific code is generated by tablegen in the file /// WebAssemblyGenFastISel.inc, which is #included here. /// +/// TODO: kill flags +/// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "WebAssemblyTargetMachine.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -41,13 +44,122 @@ using namespace llvm; namespace { class WebAssemblyFastISel final : public FastISel { + // All possible address modes. + class Address { + public: + typedef enum { RegBase, FrameIndexBase } BaseKind; + + private: + BaseKind Kind; + union { + unsigned Reg; + int FI; + } Base; + + int64_t Offset; + + const GlobalValue *GV; + + public: + // Innocuous defaults for our address. + Address() : Kind(RegBase), Offset(0), GV(0) { Base.Reg = 0; } + void setKind(BaseKind K) { Kind = K; } + BaseKind getKind() const { return Kind; } + bool isRegBase() const { return Kind == RegBase; } + bool isFIBase() const { return Kind == FrameIndexBase; } + void setReg(unsigned Reg) { + assert(isRegBase() && "Invalid base register access!"); + Base.Reg = Reg; + } + unsigned getReg() const { + assert(isRegBase() && "Invalid base register access!"); + return Base.Reg; + } + void setFI(unsigned FI) { + assert(isFIBase() && "Invalid base frame index access!"); + Base.FI = FI; + } + unsigned getFI() const { + assert(isFIBase() && "Invalid base frame index access!"); + return Base.FI; + } + + void setOffset(int64_t Offset_) { Offset = Offset_; } + int64_t getOffset() const { return Offset; } + void setGlobalValue(const GlobalValue *G) { GV = G; } + const GlobalValue *getGlobalValue() const { return GV; } + }; + /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; LLVMContext *Context; - // Call handling routines. private: + // Utility helper routines + MVT::SimpleValueType getSimpleType(Type *Ty) { + EVT VT = TLI.getValueType(DL, Ty, /*HandleUnknown=*/true); + return VT.isSimple() ? VT.getSimpleVT().SimpleTy : + MVT::INVALID_SIMPLE_VALUE_TYPE; + } + MVT::SimpleValueType getLegalType(MVT::SimpleValueType VT) { + switch (VT) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + return MVT::i32; + case MVT::i32: + case MVT::i64: + case MVT::f32: + case MVT::f64: + return VT; + default: + break; + } + return MVT::INVALID_SIMPLE_VALUE_TYPE; + } + bool computeAddress(const Value *Obj, Address &Addr); + void materializeLoadStoreOperands(Address &Addr); + void addLoadStoreOperands(const Address &Addr, const MachineInstrBuilder &MIB, + MachineMemOperand *MMO); + unsigned maskI1Value(unsigned Reg, const Value *V); + unsigned getRegForI1Value(const Value *V, bool &Not); + unsigned zeroExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From); + unsigned signExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From); + unsigned zeroExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To); + unsigned signExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To); + unsigned getRegForUnsignedValue(const Value *V); + unsigned getRegForSignedValue(const Value *V); + unsigned getRegForPromotedValue(const Value *V, bool IsSigned); + unsigned notValue(unsigned Reg); + unsigned copyValue(unsigned Reg); + + // Backend specific FastISel code. + unsigned fastMaterializeAlloca(const AllocaInst *AI) override; + unsigned fastMaterializeConstant(const Constant *C) override; + bool fastLowerArguments() override; + + // Selection routines. + bool selectCall(const Instruction *I); + bool selectSelect(const Instruction *I); + bool selectTrunc(const Instruction *I); + bool selectZExt(const Instruction *I); + bool selectSExt(const Instruction *I); + bool selectICmp(const Instruction *I); + bool selectFCmp(const Instruction *I); + bool selectBitCast(const Instruction *I); + bool selectLoad(const Instruction *I); + bool selectStore(const Instruction *I); + bool selectBr(const Instruction *I); + bool selectRet(const Instruction *I); + bool selectUnreachable(const Instruction *I); + public: // Backend specific FastISel code. WebAssemblyFastISel(FunctionLoweringInfo &FuncInfo, @@ -64,11 +176,1001 @@ public: } // end anonymous namespace +bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) { + + const User *U = nullptr; + unsigned Opcode = Instruction::UserOp1; + if (const Instruction *I = dyn_cast<Instruction>(Obj)) { + // Don't walk into other basic blocks unless the object is an alloca from + // another block, otherwise it may not have a virtual register assigned. + if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || + FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { + Opcode = I->getOpcode(); + U = I; + } + } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { + Opcode = C->getOpcode(); + U = C; + } + + if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { + if (Addr.getGlobalValue()) + return false; + Addr.setGlobalValue(GV); + return true; + } + + switch (Opcode) { + default: + break; + case Instruction::BitCast: { + // Look through bitcasts. + return computeAddress(U->getOperand(0), Addr); + } + case Instruction::IntToPtr: { + // Look past no-op inttoptrs. + if (TLI.getValueType(DL, U->getOperand(0)->getType()) == + TLI.getPointerTy(DL)) + return computeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::PtrToInt: { + // Look past no-op ptrtoints. + if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) + return computeAddress(U->getOperand(0), Addr); + break; + } + case Instruction::GetElementPtr: { + Address SavedAddr = Addr; + uint64_t TmpOffset = Addr.getOffset(); + // Iterate through the GEP folding the constants into offsets where + // we can. + for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); + GTI != E; ++GTI) { + const Value *Op = GTI.getOperand(); + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); + TmpOffset += SL->getElementOffset(Idx); + } else { + uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { + // Constant-offset addressing. + TmpOffset += CI->getSExtValue() * S; + break; + } + if (S == 1 && Addr.isRegBase() && Addr.getReg() == 0) { + // An unscaled add of a register. Set it as the new base. + Addr.setReg(getRegForValue(Op)); + break; + } + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. + ConstantInt *CI = + cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); + TmpOffset += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast<AddOperator>(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } + } + } + // Try to grab the base operand now. + Addr.setOffset(TmpOffset); + if (computeAddress(U->getOperand(0), Addr)) + return true; + // We failed, restore everything and try the other options. + Addr = SavedAddr; + unsupported_gep: + break; + } + case Instruction::Alloca: { + const AllocaInst *AI = cast<AllocaInst>(Obj); + DenseMap<const AllocaInst *, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + Addr.setKind(Address::FrameIndexBase); + Addr.setFI(SI->second); + return true; + } + break; + } + case Instruction::Add: { + // Adds of constants are common and easy enough. + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (isa<ConstantInt>(LHS)) + std::swap(LHS, RHS); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); + return computeAddress(LHS, Addr); + } + + Address Backup = Addr; + if (computeAddress(LHS, Addr) && computeAddress(RHS, Addr)) + return true; + Addr = Backup; + + break; + } + case Instruction::Sub: { + // Subs of constants are common and easy enough. + const Value *LHS = U->getOperand(0); + const Value *RHS = U->getOperand(1); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); + return computeAddress(LHS, Addr); + } + break; + } + } + Addr.setReg(getRegForValue(Obj)); + return Addr.getReg() != 0; +} + +void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) { + if (Addr.isRegBase()) { + unsigned Reg = Addr.getReg(); + if (Reg == 0) { + Reg = createResultReg(Subtarget->hasAddr64() ? + &WebAssembly::I64RegClass : + &WebAssembly::I32RegClass); + unsigned Opc = Subtarget->hasAddr64() ? + WebAssembly::CONST_I64 : + WebAssembly::CONST_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), Reg) + .addImm(0); + Addr.setReg(Reg); + } + } +} + +void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr, + const MachineInstrBuilder &MIB, + MachineMemOperand *MMO) { + if (const GlobalValue *GV = Addr.getGlobalValue()) + MIB.addGlobalAddress(GV, Addr.getOffset()); + else + MIB.addImm(Addr.getOffset()); + + if (Addr.isRegBase()) + MIB.addReg(Addr.getReg()); + else + MIB.addFrameIndex(Addr.getFI()); + + // Set the alignment operand (this is rewritten in SetP2AlignOperands). + // TODO: Disable SetP2AlignOperands for FastISel and just do it here. + MIB.addImm(0); + + MIB.addMemOperand(MMO); +} + +unsigned WebAssemblyFastISel::maskI1Value(unsigned Reg, const Value *V) { + return zeroExtendToI32(Reg, V, MVT::i1); +} + +unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) { + if (const ICmpInst *ICmp = dyn_cast<ICmpInst>(V)) + if (const ConstantInt *C = dyn_cast<ConstantInt>(ICmp->getOperand(1))) + if (ICmp->isEquality() && C->isZero() && C->getType()->isIntegerTy(32)) { + Not = ICmp->isTrueWhenEqual(); + return getRegForValue(ICmp->getOperand(0)); + } + + if (BinaryOperator::isNot(V)) { + Not = true; + return getRegForValue(BinaryOperator::getNotArgument(V)); + } + + Not = false; + return maskI1Value(getRegForValue(V), V); +} + +unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From) { + switch (From) { + case MVT::i1: + // If the value is naturally an i1, we don't need to mask it. + // TODO: Recursively examine selects, phis, and, or, xor, constants. + if (From == MVT::i1 && V != nullptr) { + if (isa<CmpInst>(V) || + (isa<Argument>(V) && cast<Argument>(V)->hasZExtAttr())) + return copyValue(Reg); + } + case MVT::i8: + case MVT::i16: + break; + case MVT::i32: + return copyValue(Reg); + default: + return 0; + } + + unsigned Imm = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::CONST_I32), Imm) + .addImm(~(~uint64_t(0) << MVT(From).getSizeInBits())); + + unsigned Result = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::AND_I32), Result) + .addReg(Reg) + .addReg(Imm); + + return Result; +} + +unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V, + MVT::SimpleValueType From) { + switch (From) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + break; + case MVT::i32: + return copyValue(Reg); + default: + return 0; + } + + unsigned Imm = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::CONST_I32), Imm) + .addImm(32 - MVT(From).getSizeInBits()); + + unsigned Left = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::SHL_I32), Left) + .addReg(Reg) + .addReg(Imm); + + unsigned Right = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::SHR_S_I32), Right) + .addReg(Left) + .addReg(Imm); + + return Right; +} + +unsigned WebAssemblyFastISel::zeroExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To) { + if (To == MVT::i64) { + if (From == MVT::i64) + return copyValue(Reg); + + Reg = zeroExtendToI32(Reg, V, From); + + unsigned Result = createResultReg(&WebAssembly::I64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I64_EXTEND_U_I32), Result) + .addReg(Reg); + return Result; + } + + return zeroExtendToI32(Reg, V, From); +} + +unsigned WebAssemblyFastISel::signExtend(unsigned Reg, const Value *V, + MVT::SimpleValueType From, + MVT::SimpleValueType To) { + if (To == MVT::i64) { + if (From == MVT::i64) + return copyValue(Reg); + + Reg = signExtendToI32(Reg, V, From); + + unsigned Result = createResultReg(&WebAssembly::I64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I64_EXTEND_S_I32), Result) + .addReg(Reg); + return Result; + } + + return signExtendToI32(Reg, V, From); +} + +unsigned WebAssemblyFastISel::getRegForUnsignedValue(const Value *V) { + MVT::SimpleValueType From = getSimpleType(V->getType()); + MVT::SimpleValueType To = getLegalType(From); + return zeroExtend(getRegForValue(V), V, From, To); +} + +unsigned WebAssemblyFastISel::getRegForSignedValue(const Value *V) { + MVT::SimpleValueType From = getSimpleType(V->getType()); + MVT::SimpleValueType To = getLegalType(From); + return zeroExtend(getRegForValue(V), V, From, To); +} + +unsigned WebAssemblyFastISel::getRegForPromotedValue(const Value *V, + bool IsSigned) { + return IsSigned ? getRegForSignedValue(V) : + getRegForUnsignedValue(V); +} + +unsigned WebAssemblyFastISel::notValue(unsigned Reg) { + assert(MRI.getRegClass(Reg) == &WebAssembly::I32RegClass); + + unsigned NotReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::EQZ_I32), NotReg) + .addReg(Reg); + return NotReg; +} + +unsigned WebAssemblyFastISel::copyValue(unsigned Reg) { + unsigned ResultReg = createResultReg(MRI.getRegClass(Reg)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::COPY), ResultReg) + .addReg(Reg); + return ResultReg; +} + +unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) { + DenseMap<const AllocaInst *, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + + if (SI != FuncInfo.StaticAllocaMap.end()) { + unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ? + &WebAssembly::I64RegClass : + &WebAssembly::I32RegClass); + unsigned Opc = Subtarget->hasAddr64() ? + WebAssembly::COPY_LOCAL_I64 : + WebAssembly::COPY_LOCAL_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addFrameIndex(SI->second); + return ResultReg; + } + + return 0; +} + +unsigned WebAssemblyFastISel::fastMaterializeConstant(const Constant *C) { + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) { + unsigned ResultReg = createResultReg(Subtarget->hasAddr64() ? + &WebAssembly::I64RegClass : + &WebAssembly::I32RegClass); + unsigned Opc = Subtarget->hasAddr64() ? + WebAssembly::CONST_I64 : + WebAssembly::CONST_I32; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addGlobalAddress(GV); + return ResultReg; + } + + // Let target-independent code handle it. + return 0; +} + +bool WebAssemblyFastISel::fastLowerArguments() { + if (!FuncInfo.CanLowerReturn) + return false; + + const Function *F = FuncInfo.Fn; + if (F->isVarArg()) + return false; + + unsigned i = 0; + for (auto const &Arg : F->args()) { + const AttributeSet &Attrs = F->getAttributes(); + if (Attrs.hasAttribute(i+1, Attribute::ByVal) || + Attrs.hasAttribute(i+1, Attribute::SwiftSelf) || + Attrs.hasAttribute(i+1, Attribute::SwiftError) || + Attrs.hasAttribute(i+1, Attribute::InAlloca) || + Attrs.hasAttribute(i+1, Attribute::Nest)) + return false; + + Type *ArgTy = Arg.getType(); + if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) + return false; + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(ArgTy)) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = WebAssembly::ARGUMENT_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::ARGUMENT_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::ARGUMENT_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::ARGUMENT_F64; + RC = &WebAssembly::F64RegClass; + break; + default: + return false; + } + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addImm(i); + updateValueMap(&Arg, ResultReg); + + ++i; + } + + MRI.addLiveIn(WebAssembly::ARGUMENTS); + + auto *MFI = MF->getInfo<WebAssemblyFunctionInfo>(); + for (auto const &Arg : F->args()) + MFI->addParam(getLegalType(getSimpleType(Arg.getType()))); + + return true; +} + +bool WebAssemblyFastISel::selectCall(const Instruction *I) { + const CallInst *Call = cast<CallInst>(I); + + if (Call->isMustTailCall() || Call->isInlineAsm() || + Call->getFunctionType()->isVarArg()) + return false; + + Function *Func = Call->getCalledFunction(); + if (Func && Func->isIntrinsic()) + return false; + + FunctionType *FuncTy = Call->getFunctionType(); + unsigned Opc; + bool IsDirect = Func != nullptr; + bool IsVoid = FuncTy->getReturnType()->isVoidTy(); + unsigned ResultReg; + if (IsVoid) { + Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::CALL_INDIRECT_VOID; + } else { + MVT::SimpleValueType RetTy = getSimpleType(Call->getType()); + switch (RetTy) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::CALL_INDIRECT_I32; + ResultReg = createResultReg(&WebAssembly::I32RegClass); + break; + case MVT::i64: + Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::CALL_INDIRECT_I64; + ResultReg = createResultReg(&WebAssembly::I64RegClass); + break; + case MVT::f32: + Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::CALL_INDIRECT_F32; + ResultReg = createResultReg(&WebAssembly::F32RegClass); + break; + case MVT::f64: + Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::CALL_INDIRECT_F64; + ResultReg = createResultReg(&WebAssembly::F64RegClass); + break; + default: + return false; + } + } + + SmallVector<unsigned, 8> Args; + for (unsigned i = 0, e = Call->getNumArgOperands(); i < e; ++i) { + Value *V = Call->getArgOperand(i); + MVT::SimpleValueType ArgTy = getSimpleType(V->getType()); + if (ArgTy == MVT::INVALID_SIMPLE_VALUE_TYPE) + return false; + + const AttributeSet &Attrs = Call->getAttributes(); + if (Attrs.hasAttribute(i+1, Attribute::ByVal) || + Attrs.hasAttribute(i+1, Attribute::SwiftSelf) || + Attrs.hasAttribute(i+1, Attribute::SwiftError) || + Attrs.hasAttribute(i+1, Attribute::InAlloca) || + Attrs.hasAttribute(i+1, Attribute::Nest)) + return false; + + unsigned Reg; + + if (Attrs.hasAttribute(i+1, Attribute::SExt)) + Reg = getRegForSignedValue(V); + else if (Attrs.hasAttribute(i+1, Attribute::ZExt)) + Reg = getRegForUnsignedValue(V); + else + Reg = getRegForValue(V); + + if (Reg == 0) + return false; + + Args.push_back(Reg); + } + + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + + if (!IsVoid) + MIB.addReg(ResultReg, RegState::Define); + + if (IsDirect) + MIB.addGlobalAddress(Func); + else + MIB.addReg(getRegForValue(Call->getCalledValue())); + + for (unsigned ArgReg : Args) + MIB.addReg(ArgReg); + + if (!IsVoid) + updateValueMap(Call, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectSelect(const Instruction *I) { + const SelectInst *Select = cast<SelectInst>(I); + + bool Not; + unsigned CondReg = getRegForI1Value(Select->getCondition(), Not); + if (CondReg == 0) + return false; + + unsigned TrueReg = getRegForValue(Select->getTrueValue()); + if (TrueReg == 0) + return false; + + unsigned FalseReg = getRegForValue(Select->getFalseValue()); + if (FalseReg == 0) + return false; + + if (Not) + std::swap(TrueReg, FalseReg); + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(Select->getType())) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + Opc = WebAssembly::SELECT_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::SELECT_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::SELECT_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::SELECT_F64; + RC = &WebAssembly::F64RegClass; + break; + default: + return false; + } + + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(TrueReg) + .addReg(FalseReg) + .addReg(CondReg); + + updateValueMap(Select, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectTrunc(const Instruction *I) { + const TruncInst *Trunc = cast<TruncInst>(I); + + unsigned Reg = getRegForValue(Trunc->getOperand(0)); + if (Reg == 0) + return false; + + if (Trunc->getOperand(0)->getType()->isIntegerTy(64)) { + unsigned Result = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::I32_WRAP_I64), Result) + .addReg(Reg); + Reg = Result; + } + + updateValueMap(Trunc, Reg); + return true; +} + +bool WebAssemblyFastISel::selectZExt(const Instruction *I) { + const ZExtInst *ZExt = cast<ZExtInst>(I); + + const Value *Op = ZExt->getOperand(0); + MVT::SimpleValueType From = getSimpleType(Op->getType()); + MVT::SimpleValueType To = getLegalType(getSimpleType(ZExt->getType())); + unsigned Reg = zeroExtend(getRegForValue(Op), Op, From, To); + if (Reg == 0) + return false; + + updateValueMap(ZExt, Reg); + return true; +} + +bool WebAssemblyFastISel::selectSExt(const Instruction *I) { + const SExtInst *SExt = cast<SExtInst>(I); + + const Value *Op = SExt->getOperand(0); + MVT::SimpleValueType From = getSimpleType(Op->getType()); + MVT::SimpleValueType To = getLegalType(getSimpleType(SExt->getType())); + unsigned Reg = signExtend(getRegForValue(Op), Op, From, To); + if (Reg == 0) + return false; + + updateValueMap(SExt, Reg); + return true; +} + +bool WebAssemblyFastISel::selectICmp(const Instruction *I) { + const ICmpInst *ICmp = cast<ICmpInst>(I); + + bool I32 = getSimpleType(ICmp->getOperand(0)->getType()) != MVT::i64; + unsigned Opc; + bool isSigned = false; + switch (ICmp->getPredicate()) { + case ICmpInst::ICMP_EQ: + Opc = I32 ? WebAssembly::EQ_I32 : WebAssembly::EQ_I64; + break; + case ICmpInst::ICMP_NE: + Opc = I32 ? WebAssembly::NE_I32 : WebAssembly::NE_I64; + break; + case ICmpInst::ICMP_UGT: + Opc = I32 ? WebAssembly::GT_U_I32 : WebAssembly::GT_U_I64; + break; + case ICmpInst::ICMP_UGE: + Opc = I32 ? WebAssembly::GE_U_I32 : WebAssembly::GE_U_I64; + break; + case ICmpInst::ICMP_ULT: + Opc = I32 ? WebAssembly::LT_U_I32 : WebAssembly::LT_U_I64; + break; + case ICmpInst::ICMP_ULE: + Opc = I32 ? WebAssembly::LE_U_I32 : WebAssembly::LE_U_I64; + break; + case ICmpInst::ICMP_SGT: + Opc = I32 ? WebAssembly::GT_S_I32 : WebAssembly::GT_S_I64; + isSigned = true; + break; + case ICmpInst::ICMP_SGE: + Opc = I32 ? WebAssembly::GE_S_I32 : WebAssembly::GE_S_I64; + isSigned = true; + break; + case ICmpInst::ICMP_SLT: + Opc = I32 ? WebAssembly::LT_S_I32 : WebAssembly::LT_S_I64; + isSigned = true; + break; + case ICmpInst::ICMP_SLE: + Opc = I32 ? WebAssembly::LE_S_I32 : WebAssembly::LE_S_I64; + isSigned = true; + break; + default: return false; + } + + unsigned LHS = getRegForPromotedValue(ICmp->getOperand(0), isSigned); + if (LHS == 0) + return false; + + unsigned RHS = getRegForPromotedValue(ICmp->getOperand(1), isSigned); + if (RHS == 0) + return false; + + unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHS) + .addReg(RHS); + updateValueMap(ICmp, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectFCmp(const Instruction *I) { + const FCmpInst *FCmp = cast<FCmpInst>(I); + + unsigned LHS = getRegForValue(FCmp->getOperand(0)); + if (LHS == 0) + return false; + + unsigned RHS = getRegForValue(FCmp->getOperand(1)); + if (RHS == 0) + return false; + + bool F32 = getSimpleType(FCmp->getOperand(0)->getType()) != MVT::f64; + unsigned Opc; + bool Not = false; + switch (FCmp->getPredicate()) { + case FCmpInst::FCMP_OEQ: + Opc = F32 ? WebAssembly::EQ_F32 : WebAssembly::EQ_F64; + break; + case FCmpInst::FCMP_UNE: + Opc = F32 ? WebAssembly::NE_F32 : WebAssembly::NE_F64; + break; + case FCmpInst::FCMP_OGT: + Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64; + break; + case FCmpInst::FCMP_OGE: + Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64; + break; + case FCmpInst::FCMP_OLT: + Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64; + break; + case FCmpInst::FCMP_OLE: + Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64; + break; + case FCmpInst::FCMP_UGT: + Opc = F32 ? WebAssembly::LE_F32 : WebAssembly::LE_F64; + Not = true; + break; + case FCmpInst::FCMP_UGE: + Opc = F32 ? WebAssembly::LT_F32 : WebAssembly::LT_F64; + Not = true; + break; + case FCmpInst::FCMP_ULT: + Opc = F32 ? WebAssembly::GE_F32 : WebAssembly::GE_F64; + Not = true; + break; + case FCmpInst::FCMP_ULE: + Opc = F32 ? WebAssembly::GT_F32 : WebAssembly::GT_F64; + Not = true; + break; + default: + return false; + } + + unsigned ResultReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(LHS) + .addReg(RHS); + + if (Not) + ResultReg = notValue(ResultReg); + + updateValueMap(FCmp, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectBitCast(const Instruction *I) { + // Target-independent code can handle this, except it doesn't set the dead + // flag on the ARGUMENTS clobber, so we have to do that manually in order + // to satisfy code that expects this of isBitcast() instructions. + EVT VT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT RetVT = TLI.getValueType(DL, I->getType()); + if (!VT.isSimple() || !RetVT.isSimple()) + return false; + + if (VT == RetVT) { + // No-op bitcast. + updateValueMap(I, getRegForValue(I->getOperand(0))); + return true; + } + + unsigned Reg = fastEmit_ISD_BITCAST_r(VT.getSimpleVT(), RetVT.getSimpleVT(), + getRegForValue(I->getOperand(0)), + I->getOperand(0)->hasOneUse()); + if (!Reg) + return false; + MachineBasicBlock::iterator Iter = FuncInfo.InsertPt; + --Iter; + assert(Iter->isBitcast()); + Iter->setPhysRegsDeadExcept(ArrayRef<unsigned>(), TRI); + updateValueMap(I, Reg); + return true; +} + +bool WebAssemblyFastISel::selectLoad(const Instruction *I) { + const LoadInst *Load = cast<LoadInst>(I); + if (Load->isAtomic()) + return false; + + Address Addr; + if (!computeAddress(Load->getPointerOperand(), Addr)) + return false; + + // TODO: Fold a following sign-/zero-extend into the load instruction. + + unsigned Opc; + const TargetRegisterClass *RC; + switch (getSimpleType(Load->getType())) { + case MVT::i1: + case MVT::i8: + Opc = WebAssembly::LOAD8_U_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i16: + Opc = WebAssembly::LOAD16_U_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i32: + Opc = WebAssembly::LOAD_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::LOAD_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::LOAD_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::LOAD_F64; + RC = &WebAssembly::F64RegClass; + break; + default: + return false; + } + + materializeLoadStoreOperands(Addr); + + unsigned ResultReg = createResultReg(RC); + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Load)); + + updateValueMap(Load, ResultReg); + return true; +} + +bool WebAssemblyFastISel::selectStore(const Instruction *I) { + const StoreInst *Store = cast<StoreInst>(I); + if (Store->isAtomic()) + return false; + + Address Addr; + if (!computeAddress(Store->getPointerOperand(), Addr)) + return false; + + unsigned Opc; + const TargetRegisterClass *RC; + bool VTIsi1 = false; + switch (getSimpleType(Store->getValueOperand()->getType())) { + case MVT::i1: + VTIsi1 = true; + case MVT::i8: + Opc = WebAssembly::STORE8_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i16: + Opc = WebAssembly::STORE16_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i32: + Opc = WebAssembly::STORE_I32; + RC = &WebAssembly::I32RegClass; + break; + case MVT::i64: + Opc = WebAssembly::STORE_I64; + RC = &WebAssembly::I64RegClass; + break; + case MVT::f32: + Opc = WebAssembly::STORE_F32; + RC = &WebAssembly::F32RegClass; + break; + case MVT::f64: + Opc = WebAssembly::STORE_F64; + RC = &WebAssembly::F64RegClass; + break; + default: return false; + } + + materializeLoadStoreOperands(Addr); + + unsigned ValueReg = getRegForValue(Store->getValueOperand()); + if (VTIsi1) + ValueReg = maskI1Value(ValueReg, Store->getValueOperand()); + + unsigned ResultReg = createResultReg(RC); + auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store)); + + MIB.addReg(ValueReg); + return true; +} + +bool WebAssemblyFastISel::selectBr(const Instruction *I) { + const BranchInst *Br = cast<BranchInst>(I); + if (Br->isUnconditional()) { + MachineBasicBlock *MSucc = FuncInfo.MBBMap[Br->getSuccessor(0)]; + fastEmitBranch(MSucc, Br->getDebugLoc()); + return true; + } + + MachineBasicBlock *TBB = FuncInfo.MBBMap[Br->getSuccessor(0)]; + MachineBasicBlock *FBB = FuncInfo.MBBMap[Br->getSuccessor(1)]; + + bool Not; + unsigned CondReg = getRegForI1Value(Br->getCondition(), Not); + + unsigned Opc = WebAssembly::BR_IF; + if (Not) + Opc = WebAssembly::BR_UNLESS; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) + .addMBB(TBB) + .addReg(CondReg); + + finishCondBranch(Br->getParent(), TBB, FBB); + return true; +} + +bool WebAssemblyFastISel::selectRet(const Instruction *I) { + if (!FuncInfo.CanLowerReturn) + return false; + + const ReturnInst *Ret = cast<ReturnInst>(I); + + if (Ret->getNumOperands() == 0) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::RETURN_VOID)); + return true; + } + + Value *RV = Ret->getOperand(0); + unsigned Opc; + switch (getSimpleType(RV->getType())) { + case MVT::i1: case MVT::i8: + case MVT::i16: case MVT::i32: + Opc = WebAssembly::RETURN_I32; + break; + case MVT::i64: + Opc = WebAssembly::RETURN_I64; + break; + case MVT::f32: Opc = WebAssembly::RETURN_F32; break; + case MVT::f64: Opc = WebAssembly::RETURN_F64; break; + default: return false; + } + + unsigned Reg; + if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::SExt)) + Reg = getRegForSignedValue(RV); + else if (FuncInfo.Fn->getAttributes().hasAttribute(0, Attribute::ZExt)) + Reg = getRegForUnsignedValue(RV); + else + Reg = getRegForValue(RV); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg); + return true; +} + +bool WebAssemblyFastISel::selectUnreachable(const Instruction *I) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(WebAssembly::UNREACHABLE)); + return true; +} + bool WebAssemblyFastISel::fastSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { - default: + case Instruction::Call: + if (selectCall(I)) + return true; break; - // TODO: add fast-isel selection cases here... + case Instruction::Select: return selectSelect(I); + case Instruction::Trunc: return selectTrunc(I); + case Instruction::ZExt: return selectZExt(I); + case Instruction::SExt: return selectSExt(I); + case Instruction::ICmp: return selectICmp(I); + case Instruction::FCmp: return selectFCmp(I); + case Instruction::BitCast: return selectBitCast(I); + case Instruction::Load: return selectLoad(I); + case Instruction::Store: return selectStore(I); + case Instruction::Br: return selectBr(I); + case Instruction::Ret: return selectRet(I); + case Instruction::Unreachable: return selectUnreachable(I); + default: break; } // Fall back to target-independent instruction selection. diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp new file mode 100644 index 00000000000..5dc90920e31 --- /dev/null +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp @@ -0,0 +1,296 @@ +//=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass that transforms irreducible control flow +/// into reducible control flow. Irreducible control flow means multiple-entry +/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo +/// due to being unnatural. +/// +/// Note that LLVM has a generic pass that lowers irreducible control flow, but +/// it linearizes control flow, turning diamonds into two triangles, which is +/// both unnecessary and undesirable for WebAssembly. +/// +/// TODO: The transformation implemented here handles all irreducible control +/// flow, without exponential code-size expansion, though it does so by creating +/// inefficient code in many cases. Ideally, we should add other +/// transformations, including code-duplicating cases, which can be more +/// efficient in common cases, and they can fall back to this conservative +/// implementation as needed. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-fix-irreducible-control-flow" + +namespace { +class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Fix Irreducible Control Flow"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop); + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyFixIrreducibleControlFlow::ID = 0; +FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() { + return new WebAssemblyFixIrreducibleControlFlow(); +} + +namespace { + +/// A utility for walking the blocks of a loop, handling a nested inner +/// loop as a monolithic conceptual block. +class MetaBlock { + MachineBasicBlock *Block; + SmallVector<MachineBasicBlock *, 2> Preds; + SmallVector<MachineBasicBlock *, 2> Succs; + +public: + explicit MetaBlock(MachineBasicBlock *MBB) + : Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()), + Succs(MBB->succ_begin(), MBB->succ_end()) {} + + explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) { + Loop->getExitBlocks(Succs); + for (MachineBasicBlock *Pred : Block->predecessors()) + if (!Loop->contains(Pred)) + Preds.push_back(Pred); + } + + MachineBasicBlock *getBlock() const { return Block; } + + const SmallVectorImpl<MachineBasicBlock *> &predecessors() const { + return Preds; + } + const SmallVectorImpl<MachineBasicBlock *> &successors() const { + return Succs; + } + + bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; } + bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; } +}; + +class SuccessorList final : public MetaBlock { + size_t Index; + size_t Num; + +public: + explicit SuccessorList(MachineBasicBlock *MBB) + : MetaBlock(MBB), Index(0), Num(successors().size()) {} + + explicit SuccessorList(MachineLoop *Loop) + : MetaBlock(Loop), Index(0), Num(successors().size()) {} + + bool HasNext() const { return Index != Num; } + + MachineBasicBlock *Next() { + assert(HasNext()); + return successors()[Index++]; + } +}; + +} // end anonymous namespace + +bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF, + MachineLoopInfo &MLI, + MachineLoop *Loop) { + MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin(); + SetVector<MachineBasicBlock *> RewriteSuccs; + + // DFS through Loop's body, looking for for irreducible control flow. Loop is + // natural, and we stay in its body, and we treat any nested loops + // monolithically, so any cycles we encounter indicate irreducibility. + SmallPtrSet<MachineBasicBlock *, 8> OnStack; + SmallPtrSet<MachineBasicBlock *, 8> Visited; + SmallVector<SuccessorList, 4> LoopWorklist; + LoopWorklist.push_back(SuccessorList(Header)); + OnStack.insert(Header); + Visited.insert(Header); + while (!LoopWorklist.empty()) { + SuccessorList &Top = LoopWorklist.back(); + if (Top.HasNext()) { + MachineBasicBlock *Next = Top.Next(); + if (Next == Header || (Loop && !Loop->contains(Next))) + continue; + if (LLVM_LIKELY(OnStack.insert(Next).second)) { + if (!Visited.insert(Next).second) { + OnStack.erase(Next); + continue; + } + MachineLoop *InnerLoop = MLI.getLoopFor(Next); + if (InnerLoop != Loop) + LoopWorklist.push_back(SuccessorList(InnerLoop)); + else + LoopWorklist.push_back(SuccessorList(Next)); + } else { + RewriteSuccs.insert(Top.getBlock()); + } + continue; + } + OnStack.erase(Top.getBlock()); + LoopWorklist.pop_back(); + } + + // Most likely, we didn't find any irreducible control flow. + if (LLVM_LIKELY(RewriteSuccs.empty())) + return false; + + DEBUG(dbgs() << "Irreducible control flow detected!\n"); + + // Ok. We have irreducible control flow! Create a dispatch block which will + // contains a jump table to any block in the problematic set of blocks. + MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock(); + MF.insert(MF.end(), Dispatch); + MLI.changeLoopFor(Dispatch, Loop); + + // Add the jump table. + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(), + TII.get(WebAssembly::BR_TABLE_I32)); + + // Add the register which will be used to tell the jump table which block to + // jump to. + MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MIB.addReg(Reg); + + // Collect all the blocks which need to have their successors rewritten, + // add the successors to the jump table, and remember their index. + DenseMap<MachineBasicBlock *, unsigned> Indices; + SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(), + RewriteSuccs.end()); + while (!SuccWorklist.empty()) { + MachineBasicBlock *MBB = SuccWorklist.pop_back_val(); + auto Pair = Indices.insert(std::make_pair(MBB, 0)); + if (!Pair.second) + continue; + + unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1; + DEBUG(dbgs() << "MBB#" << MBB->getNumber() << " has index " << Index + << "\n"); + + Pair.first->second = Index; + for (auto Pred : MBB->predecessors()) + RewriteSuccs.insert(Pred); + + MIB.addMBB(MBB); + Dispatch->addSuccessor(MBB); + + MetaBlock Meta(MBB); + for (auto *Succ : Meta.successors()) + if (Succ != Header && (!Loop || Loop->contains(Succ))) + SuccWorklist.push_back(Succ); + } + + // Rewrite the problematic successors for every block in RewriteSuccs. + // For simplicity, we just introduce a new block for every edge we need to + // rewrite. Fancier things are possible. + for (MachineBasicBlock *MBB : RewriteSuccs) { + DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map; + for (auto *Succ : MBB->successors()) { + if (!Indices.count(Succ)) + continue; + + MachineBasicBlock *Split = MF.CreateMachineBasicBlock(); + MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ) + : MF.end(), + Split); + MLI.changeLoopFor(Split, Loop); + + // Set the jump table's register of the index of the block we wish to + // jump to, and jump to the jump table. + BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32), + Reg) + .addImm(Indices[Succ]); + BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR)) + .addMBB(Dispatch); + Split->addSuccessor(Dispatch); + Map[Succ] = Split; + } + // Remap the terminator operands and the successor list. + for (MachineInstr &Term : MBB->terminators()) + for (auto &Op : Term.explicit_uses()) + if (Op.isMBB() && Indices.count(Op.getMBB())) + Op.setMBB(Map[Op.getMBB()]); + for (auto Rewrite : Map) + MBB->replaceSuccessor(Rewrite.first, Rewrite.second); + } + + // Create a fake default label, because br_table requires one. + MIB.addMBB(MIB.getInstr() + ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1) + .getMBB()); + + return true; +} + +bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction( + MachineFunction &MF) { + DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n" + "********** Function: " + << MF.getName() << '\n'); + + bool Changed = false; + auto &MLI = getAnalysis<MachineLoopInfo>(); + + // Visit the function body, which is identified as a null loop. + Changed |= VisitLoop(MF, MLI, nullptr); + + // Visit all the loops. + SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end()); + while (!Worklist.empty()) { + MachineLoop *CurLoop = Worklist.pop_back_val(); + Worklist.append(CurLoop->begin(), CurLoop->end()); + Changed |= VisitLoop(MF, MLI, CurLoop); + } + + // If we made any changes, completely recompute everything. + if (LLVM_UNLIKELY(Changed)) { + DEBUG(dbgs() << "Recomputing dominators and loops.\n"); + MF.getRegInfo().invalidateLiveness(); + MF.RenumberBlocks(); + getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF); + MLI.runOnMachineFunction(MF); + } + + return Changed; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp index 0eefd57f1f2..0a5782e5c28 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp @@ -34,10 +34,7 @@ using namespace llvm; #define DEBUG_TYPE "wasm-frame-info" -// TODO: Implement a red zone? // TODO: wasm64 -// TODO: Prolog/epilog should be stackified too. This pass runs after register -// stackification, so we'll have to do it manually. // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions /// Return true if the specified function should have a dedicated frame pointer @@ -46,7 +43,7 @@ bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const auto *RegInfo = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || + return MFI->isFrameAddressTaken() || MFI->hasVarSizedObjects() || MFI->hasStackMap() || MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF); } @@ -62,63 +59,64 @@ bool WebAssemblyFrameLowering::hasReservedCallFrame( } -/// Adjust the stack pointer by a constant amount. -static void adjustStackPointer(unsigned StackSize, - bool AdjustUp, - MachineFunction& MF, - MachineBasicBlock& MBB, - const TargetInstrInfo* TII, - MachineBasicBlock::iterator InsertPt, - const DebugLoc& DL) { - auto &MRI = MF.getRegInfo(); - unsigned SPReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), SPReg) - .addExternalSymbol(SPSymbol); - // This MachinePointerInfo should reference __stack_pointer as well but - // doesn't because MachinePointerInfo() takes a GV which we don't have for - // __stack_pointer. TODO: check if PseudoSourceValue::ExternalSymbolCallEntry - // is appropriate instead. (likewise for EmitEpologue below) - auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOLoad, 4, 4); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg) - .addImm(0) - .addReg(SPReg) - .addMemOperand(LoadMMO); - // Add/Subtract the frame size - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addImm(StackSize); - BuildMI(MBB, InsertPt, DL, - TII->get(AdjustUp ? WebAssembly::ADD_I32 : WebAssembly::SUB_I32), - WebAssembly::SP32) - .addReg(SPReg) - .addReg(OffsetReg); - // The SP32 register now has the new stacktop. Also write it back to memory. - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addExternalSymbol(SPSymbol); - auto *MMO = new MachineMemOperand(MachinePointerInfo(), +/// Returns true if this function needs a local user-space stack pointer. +/// Unlike a machine stack pointer, the wasm user stack pointer is a global +/// variable, so it is loaded into a register in the prolog. +bool WebAssemblyFrameLowering::needsSP(const MachineFunction &MF, + const MachineFrameInfo &MFI) const { + return MFI.getStackSize() || MFI.adjustsStack() || hasFP(MF); +} + +/// Returns true if the local user-space stack pointer needs to be written back +/// to memory by this function (this is not meaningful if needsSP is false). If +/// false, the stack red zone can be used and only a local SP is needed. +bool WebAssemblyFrameLowering::needsSPWriteback( + const MachineFunction &MF, const MachineFrameInfo &MFI) const { + assert(needsSP(MF, MFI)); + return MFI.getStackSize() > RedZoneSize || MFI.hasCalls() || + MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +} + +static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator &InsertAddr, + MachineBasicBlock::iterator &InsertStore, + const DebugLoc &DL) { + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned Zero = MRI.createVirtualRegister(PtrRC); + unsigned Drop = MRI.createVirtualRegister(PtrRC); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + + BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero) + .addImm(0); + auto *MMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager() + .getExternalSymbolCallEntry(ES)), MachineMemOperand::MOStore, 4, 4); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) - .addImm(0) - .addReg(OffsetReg) - .addReg(WebAssembly::SP32) + BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), Drop) + .addExternalSymbol(SPSymbol) + .addReg(Zero) + .addImm(2) // p2align + .addReg(SrcReg) .addMemOperand(MMO); } -void WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( +MachineBasicBlock::iterator +WebAssemblyFrameLowering::eliminateCallFramePseudoInstr( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - const auto *TII = - static_cast<const WebAssemblyInstrInfo*>(MF.getSubtarget().getInstrInfo()); - DebugLoc DL = I->getDebugLoc(); - unsigned Opc = I->getOpcode(); - bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); - unsigned Amount = I->getOperand(0).getImm(); - if (Amount) - adjustStackPointer(Amount, IsDestroy, MF, MBB, - TII, I, DL); - MBB.erase(I); + assert(!I->getOperand(0).getImm() && hasFP(MF) && + "Call frame pseudos should only be used for dynamic stack adjustment"); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() && + needsSPWriteback(MF, *MF.getFrameInfo())) { + DebugLoc DL = I->getDebugLoc(); + writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL); + } + return MBB.erase(I); } void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, @@ -127,49 +125,91 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF, auto *MFI = MF.getFrameInfo(); assert(MFI->getCalleeSavedInfo().empty() && "WebAssembly should not have callee-saved registers"); - assert(!hasFP(MF) && "Functions needing frame pointers not yet supported"); + + if (!needsSP(MF, *MFI)) return; uint64_t StackSize = MFI->getStackSize(); - if (!StackSize && (!MFI->adjustsStack() || MFI->getMaxCallFrameSize() == 0)) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + auto &MRI = MF.getRegInfo(); auto InsertPt = MBB.begin(); DebugLoc DL; - adjustStackPointer(StackSize, false, MF, MBB, TII, InsertPt, DL); + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned Zero = MRI.createVirtualRegister(PtrRC); + unsigned SPReg = MRI.createVirtualRegister(PtrRC); + const char *ES = "__stack_pointer"; + auto *SPSymbol = MF.createExternalSymbolName(ES); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero) + .addImm(0); + auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager() + .getExternalSymbolCallEntry(ES)), + MachineMemOperand::MOLoad, 4, 4); + // Load the SP value. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), + StackSize ? SPReg : (unsigned)WebAssembly::SP32) + .addExternalSymbol(SPSymbol) + .addReg(Zero) // addr + .addImm(2) // p2align + .addMemOperand(LoadMMO); + + if (StackSize) { + // Subtract the frame size + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::SUB_I32), + WebAssembly::SP32) + .addReg(SPReg) + .addReg(OffsetReg); + } + if (hasFP(MF)) { + // Unlike most conventional targets (where FP points to the saved FP), + // FP points to the bottom of the fixed-size locals, so we can use positive + // offsets in load/store instructions. + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), + WebAssembly::FP32) + .addReg(WebAssembly::SP32); + } + if (StackSize && needsSPWriteback(MF, *MFI)) { + writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL); + } } void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { - uint64_t StackSize = MF.getFrameInfo()->getStackSize(); - if (!StackSize) - return; - const auto *TII = MF.getSubtarget().getInstrInfo(); + auto *MFI = MF.getFrameInfo(); + uint64_t StackSize = MFI->getStackSize(); + if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return; + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); auto &MRI = MF.getRegInfo(); - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); auto InsertPt = MBB.getFirstTerminator(); DebugLoc DL; - if (InsertPt != MBB.end()) { + if (InsertPt != MBB.end()) DL = InsertPt->getDebugLoc(); + + // Restore the stack pointer. If we had fixed-size locals, add the offset + // subtracted in the prolog. + unsigned SPReg = 0; + MachineBasicBlock::iterator InsertAddr = InsertPt; + if (StackSize) { + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetReg = MRI.createVirtualRegister(PtrRC); + InsertAddr = + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) + .addImm(StackSize); + // In the epilog we don't need to write the result back to the SP32 physreg + // because it won't be used again. We can use a stackified register instead. + SPReg = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), SPReg) + .addReg(hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32) + .addReg(OffsetReg); + } else { + SPReg = hasFP(MF) ? WebAssembly::FP32 : WebAssembly::SP32; } - // Restore the stack pointer. Without FP its value is just SP32 - stacksize - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addImm(StackSize); - auto *SPSymbol = MF.createExternalSymbolName("__stack_pointer"); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::ADD_I32), WebAssembly::SP32) - .addReg(WebAssembly::SP32) - .addReg(OffsetReg); - // Re-use OffsetReg to hold the address of the stacktop - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), OffsetReg) - .addExternalSymbol(SPSymbol); - auto *MMO = new MachineMemOperand(MachinePointerInfo(), - MachineMemOperand::MOStore, 4, 4); - BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::STORE_I32), WebAssembly::SP32) - .addImm(0) - .addReg(OffsetReg) - .addReg(WebAssembly::SP32) - .addMemOperand(MMO); + writeSPToMemory(SPReg, MF, MBB, InsertAddr, InsertPt, DL); } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h index 5f4708fe77e..e20fc5df744 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h @@ -19,18 +19,24 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { +class MachineFrameInfo; class WebAssemblyFrameLowering final : public TargetFrameLowering { -public: + public: + /// Size of the red zone for the user stack (leaf functions can use this much + /// space below the stack pointer without writing it back to memory). + // TODO: (ABI) Revisit and decide how large it should be. + static const size_t RedZoneSize = 128; + WebAssemblyFrameLowering() : TargetFrameLowering(StackGrowsDown, /*StackAlignment=*/16, /*LocalAreaOffset=*/0, /*TransientStackAlignment=*/16, /*StackRealignable=*/true) {} - void - eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const override; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr( + MachineFunction &MF, MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const override; /// These methods insert prolog and epilog code into the function. void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override; @@ -38,8 +44,13 @@ public: bool hasFP(const MachineFunction &MF) const override; bool hasReservedCallFrame(const MachineFunction &MF) const override; + + private: + bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const; + bool needsSPWriteback(const MachineFunction &MF, + const MachineFrameInfo &MFI) const; }; -} // end namespace llvm +} // end namespace llvm #endif diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 3a03fa55b22..2f0f106ef5b 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -20,6 +20,6 @@ HANDLE_NODETYPE(RETURN) HANDLE_NODETYPE(ARGUMENT) HANDLE_NODETYPE(Wrapper) HANDLE_NODETYPE(BR_IF) -HANDLE_NODETYPE(TABLESWITCH) +HANDLE_NODETYPE(BR_TABLE) // add memory opcodes starting at ISD::FIRST_TARGET_MEMORY_OPCODE here... diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index 8390f797c43..88c38b3602b 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -54,7 +54,7 @@ public: return SelectionDAGISel::runOnMachineFunction(MF); } - SDNode *Select(SDNode *Node) override; + void Select(SDNode *Node) override; bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) override; @@ -67,7 +67,7 @@ private: }; } // end anonymous namespace -SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { +void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected. DEBUG(errs() << "Selecting: "); DEBUG(Node->dump(CurDAG)); @@ -77,11 +77,10 @@ SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { if (Node->isMachineOpcode()) { DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); Node->setNodeId(-1); - return nullptr; + return; } // Few custom selection stuff. - SDNode *ResNode = nullptr; EVT VT = Node->getValueType(0); switch (Node->getOpcode()) { @@ -92,16 +91,7 @@ SDNode *WebAssemblyDAGToDAGISel::Select(SDNode *Node) { } // Select the default instruction. - ResNode = SelectCode(Node); - - DEBUG(errs() << "=> "); - if (ResNode == nullptr || ResNode == Node) - DEBUG(Node->dump(CurDAG)); - else - DEBUG(ResNode->dump(CurDAG)); - DEBUG(errs() << "\n"); - - return ResNode; + SelectCode(Node); } bool WebAssemblyDAGToDAGISel::SelectInlineAsmMemoryOperand( diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index e9933b09298..9e7731997d5 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -26,7 +26,6 @@ #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -35,61 +34,6 @@ using namespace llvm; #define DEBUG_TYPE "wasm-lower" -namespace { -// Diagnostic information for unimplemented or unsupported feature reporting. -// TODO: This code is copied from BPF and AMDGPU; consider factoring it out -// and sharing code. -class DiagnosticInfoUnsupported final : public DiagnosticInfo { -private: - // Debug location where this diagnostic is triggered. - DebugLoc DLoc; - const Twine &Description; - const Function &Fn; - SDValue Value; - - static int KindID; - - static int getKindID() { - if (KindID == 0) - KindID = llvm::getNextAvailablePluginDiagnosticKind(); - return KindID; - } - -public: - DiagnosticInfoUnsupported(SDLoc DLoc, const Function &Fn, const Twine &Desc, - SDValue Value) - : DiagnosticInfo(getKindID(), DS_Error), DLoc(DLoc.getDebugLoc()), - Description(Desc), Fn(Fn), Value(Value) {} - - void print(DiagnosticPrinter &DP) const override { - std::string Str; - raw_string_ostream OS(Str); - - if (DLoc) { - auto DIL = DLoc.get(); - StringRef Filename = DIL->getFilename(); - unsigned Line = DIL->getLine(); - unsigned Column = DIL->getColumn(); - OS << Filename << ':' << Line << ':' << Column << ' '; - } - - OS << "in function " << Fn.getName() << ' ' << *Fn.getFunctionType() << '\n' - << Description; - if (Value) - Value->print(OS); - OS << '\n'; - OS.flush(); - DP << Str; - } - - static bool classof(const DiagnosticInfo *DI) { - return DI->getKind() == getKindID(); - } -}; - -int DiagnosticInfoUnsupported::KindID = 0; -} // end anonymous namespace - WebAssemblyTargetLowering::WebAssemblyTargetLowering( const TargetMachine &TM, const WebAssemblySubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -116,6 +60,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::GlobalAddress, MVTPtr, Custom); setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); + setOperationAction(ISD::BlockAddress, MVTPtr, Custom); + setOperationAction(ISD::BRIND, MVT::Other, Custom); // Take the default expansion for va_arg, va_copy, and va_end. There is no // default action for va_start, so we do that custom. @@ -148,7 +94,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( for (auto T : {MVT::i32, MVT::i64}) { // Expand unavailable integer operations. for (auto Op : - {ISD::BSWAP, ISD::ROTL, ISD::ROTR, ISD::SMUL_LOHI, ISD::UMUL_LOHI, + {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU, ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) { @@ -167,6 +113,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::CopyToReg, MVT::Other, Custom); // Expand these forms; we pattern-match the forms that we can handle in isel. for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) @@ -204,13 +151,14 @@ bool WebAssemblyTargetLowering::isOffsetFoldingLegal( MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, EVT VT) const { unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); - if (BitWidth > 1 && BitWidth < 8) - BitWidth = 8; + if (BitWidth > 1 && BitWidth < 8) BitWidth = 8; if (BitWidth > 64) { - BitWidth = 64; + // The shift will be lowered to a libcall, and compiler-rt libcalls expect + // the count to be an i32. + BitWidth = 32; assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) && - "64-bit shift counts ought to be enough for anyone"); + "32-bit shift counts ought to be enough for anyone"); } MVT Result = MVT::getIntegerVT(BitWidth); @@ -219,13 +167,13 @@ MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, return Result; } -const char * -WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const { +const char *WebAssemblyTargetLowering::getTargetNodeName( + unsigned Opcode) const { switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) { - case WebAssemblyISD::FIRST_NUMBER: - break; -#define HANDLE_NODETYPE(NODE) \ - case WebAssemblyISD::NODE: \ + case WebAssemblyISD::FIRST_NUMBER: + break; +#define HANDLE_NODETYPE(NODE) \ + case WebAssemblyISD::NODE: \ return "WebAssemblyISD::" #NODE; #include "WebAssemblyISD.def" #undef HANDLE_NODETYPE @@ -240,17 +188,17 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint( // WebAssembly register class. if (Constraint.size() == 1) { switch (Constraint[0]) { - case 'r': - assert(VT != MVT::iPTR && "Pointer MVT not expected here"); - if (VT.isInteger() && !VT.isVector()) { - if (VT.getSizeInBits() <= 32) - return std::make_pair(0U, &WebAssembly::I32RegClass); - if (VT.getSizeInBits() <= 64) - return std::make_pair(0U, &WebAssembly::I64RegClass); - } - break; - default: - break; + case 'r': + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } + break; + default: + break; } } @@ -274,17 +222,33 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, // WebAssembly offsets are added as unsigned without wrapping. The // isLegalAddressingMode gives us no way to determine if wrapping could be // happening, so we approximate this by accepting only non-negative offsets. - if (AM.BaseOffs < 0) - return false; + if (AM.BaseOffs < 0) return false; // WebAssembly has no scale register operands. - if (AM.Scale != 0) - return false; + if (AM.Scale != 0) return false; // Everything else is legal. return true; } +bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses( + EVT /*VT*/, unsigned /*AddrSpace*/, unsigned /*Align*/, bool *Fast) const { + // WebAssembly supports unaligned accesses, though it should be declared + // with the p2align attribute on loads and stores which do so, and there + // may be a performance impact. We tell LLVM they're "fast" because + // for the kinds of things that LLVM uses this for (merging adjacent stores + // of constants, etc.), WebAssembly implementations will either want the + // unaligned access or they'll split anyway. + if (Fast) *Fast = true; + return true; +} + +bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const { + // The current thinking is that wasm engines will perform this optimization, + // so we can save on code size. + return true; +} + //===----------------------------------------------------------------------===// // WebAssembly Lowering private implementation. //===----------------------------------------------------------------------===// @@ -293,10 +257,10 @@ bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, // Lowering Code //===----------------------------------------------------------------------===// -static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { +static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *msg) { MachineFunction &MF = DAG.getMachineFunction(); DAG.getContext()->diagnose( - DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); + DiagnosticInfoUnsupported(*MF.getFunction(), msg, DL.getDebugLoc())); } // Test whether the given calling convention is supported. @@ -312,14 +276,14 @@ static bool CallingConvSupported(CallingConv::ID CallConv) { CallConv == CallingConv::CXX_FAST_TLS; } -SDValue -WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl<SDValue> &InVals) const { +SDValue WebAssemblyTargetLowering::LowerCall( + CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const { SelectionDAG &DAG = CLI.DAG; SDLoc DL = CLI.DL; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; MachineFunction &MF = DAG.getMachineFunction(); + auto Layout = MF.getDataLayout(); CallingConv::ID CallConv = CLI.CallConv; if (!CallingConvSupported(CallConv)) @@ -337,16 +301,15 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, fail(DL, DAG, "WebAssembly doesn't support tail call yet"); CLI.IsTailCall = false; - SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; - SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; if (Ins.size() > 1) fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; - for (const ISD::OutputArg &Out : Outs) { - if (Out.Flags.isByVal()) - fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; + for (unsigned i = 0; i < Outs.size(); ++i) { + const ISD::OutputArg &Out = Outs[i]; + SDValue &OutVal = OutVals[i]; if (Out.Flags.isNest()) fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); if (Out.Flags.isInAlloca()) @@ -355,28 +318,41 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) { + auto *MFI = MF.getFrameInfo(); + int FI = MFI->CreateStackObject(Out.Flags.getByValSize(), + Out.Flags.getByValAlign(), + /*isSS=*/false); + SDValue SizeNode = + DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32); + SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); + Chain = DAG.getMemcpy( + Chain, DL, FINode, OutVal, SizeNode, Out.Flags.getByValAlign(), + /*isVolatile*/ false, /*AlwaysInline=*/false, + /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo()); + OutVal = FINode; + } } bool IsVarArg = CLI.IsVarArg; unsigned NumFixedArgs = CLI.NumFixedArgs; - auto PtrVT = getPointerTy(MF.getDataLayout()); + + auto PtrVT = getPointerTy(Layout); // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); if (IsVarArg) { - // Outgoing non-fixed arguments are placed at the top of the stack. First - // compute their offsets and the total amount of argument stack space - // needed. + // Outgoing non-fixed arguments are placed in a buffer. First + // compute their offsets and the total amount of buffer space needed. for (SDValue Arg : make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { EVT VT = Arg.getValueType(); assert(VT != MVT::iPTR && "Legalized args should be concrete"); Type *Ty = VT.getTypeForEVT(*DAG.getContext()); - unsigned Offset = - CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty), - MF.getDataLayout().getABITypeAlignment(Ty)); + unsigned Offset = CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), + Layout.getABITypeAlignment(Ty)); CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), Offset, VT.getSimpleVT(), CCValAssign::Full)); @@ -385,17 +361,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); - SDValue NB; - if (NumBytes) { - NB = DAG.getConstant(NumBytes, DL, PtrVT, true); - Chain = DAG.getCALLSEQ_START(Chain, NB, DL); - } - - if (IsVarArg) { + SDValue FINode; + if (IsVarArg && NumBytes) { // For non-fixed arguments, next emit stores to store the argument values - // to the stack at the offsets computed above. - SDValue SP = DAG.getCopyFromReg( - Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT); + // to the stack buffer at the offsets computed above. + int FI = MF.getFrameInfo()->CreateStackObject(NumBytes, + Layout.getStackAlignment(), + /*isSS=*/false); unsigned ValNo = 0; SmallVector<SDValue, 8> Chains; for (SDValue Arg : @@ -403,14 +375,17 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, assert(ArgLocs[ValNo].getValNo() == ValNo && "ArgLocs should remain in order and only hold varargs args"); unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); - SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP, + FINode = DAG.getFrameIndex(FI, getPointerTy(Layout)); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode, DAG.getConstant(Offset, DL, PtrVT)); - Chains.push_back(DAG.getStore(Chain, DL, Arg, Add, - MachinePointerInfo::getStack(MF, Offset), - false, false, 0)); + Chains.push_back(DAG.getStore( + Chain, DL, Arg, Add, + MachinePointerInfo::getFixedStack(MF, FI, Offset), 0)); } if (!Chains.empty()) Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } else if (IsVarArg) { + FINode = DAG.getIntPtrConstant(0, DL); } // Compute the operands for the CALLn node. @@ -422,8 +397,10 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, // isn't reliable. Ops.append(OutVals.begin(), IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); + // Add a pointer to the vararg buffer. + if (IsVarArg) Ops.push_back(FINode); - SmallVector<EVT, 8> Tys; + SmallVector<EVT, 8> InTys; for (const auto &In : Ins) { assert(!In.Flags.isByVal() && "byval is not valid for return values"); assert(!In.Flags.isNest() && "nest is not valid for return values"); @@ -436,13 +413,13 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, "WebAssembly hasn't implemented cons regs last return values"); // Ignore In.getOrigAlign() because all our arguments are passed in // registers. - Tys.push_back(In.VT); + InTys.push_back(In.VT); } - Tys.push_back(MVT::Other); - SDVTList TyList = DAG.getVTList(Tys); + InTys.push_back(MVT::Other); + SDVTList InTyList = DAG.getVTList(InTys); SDValue Res = DAG.getNode(Ins.empty() ? WebAssemblyISD::CALL0 : WebAssemblyISD::CALL1, - DL, TyList, Ops); + DL, InTyList, Ops); if (Ins.empty()) { Chain = Res; } else { @@ -450,11 +427,6 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = Res.getValue(1); } - if (NumBytes) { - SDValue Unused = DAG.getTargetConstant(0, DL, PtrVT); - Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL); - } - return Chain; } @@ -469,7 +441,7 @@ bool WebAssemblyTargetLowering::CanLowerReturn( SDValue WebAssemblyTargetLowering::LowerReturn( SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, SDLoc DL, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); if (!CallingConvSupported(CallConv)) @@ -496,10 +468,11 @@ SDValue WebAssemblyTargetLowering::LowerReturn( } SDValue WebAssemblyTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, - const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc DL, SelectionDAG &DAG, - SmallVectorImpl<SDValue> &InVals) const { + SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, + SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); + auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); @@ -509,8 +482,6 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); for (const ISD::InputArg &In : Ins) { - if (In.Flags.isByVal()) - fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); if (In.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); if (In.Flags.isNest()) @@ -528,11 +499,22 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( : DAG.getUNDEF(In.VT)); // Record the number and types of arguments. - MF.getInfo<WebAssemblyFunctionInfo>()->addParam(In.VT); + MFI->addParam(In.VT); } - // Incoming varargs arguments are on the stack and will be accessed through - // va_arg, so we don't need to do anything for them here. + // Varargs are copied into a buffer allocated by the caller, and a pointer to + // the buffer is passed as an argument. + if (IsVarArg) { + MVT PtrVT = getPointerTy(MF.getDataLayout()); + unsigned VarargVreg = + MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrVT)); + MFI->setVarargBufferVreg(VarargVreg); + Chain = DAG.getCopyToReg( + Chain, DL, VarargVreg, + DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT, + DAG.getTargetConstant(Ins.size(), DL, MVT::i32))); + MFI->addParam(PtrVT); + } return Chain; } @@ -543,31 +525,85 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); switch (Op.getOpcode()) { - default: - llvm_unreachable("unimplemented operation lowering"); - return SDValue(); - case ISD::FrameIndex: - return LowerFrameIndex(Op, DAG); - case ISD::GlobalAddress: - return LowerGlobalAddress(Op, DAG); - case ISD::ExternalSymbol: - return LowerExternalSymbol(Op, DAG); - case ISD::JumpTable: - return LowerJumpTable(Op, DAG); - case ISD::BR_JT: - return LowerBR_JT(Op, DAG); - case ISD::VASTART: - return LowerVASTART(Op, DAG); + default: + llvm_unreachable("unimplemented operation lowering"); + return SDValue(); + case ISD::FrameIndex: + return LowerFrameIndex(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); + case ISD::ExternalSymbol: + return LowerExternalSymbol(Op, DAG); + case ISD::JumpTable: + return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); + case ISD::BlockAddress: + case ISD::BRIND: + fail(DL, DAG, "WebAssembly hasn't implemented computed gotos"); + return SDValue(); + case ISD::RETURNADDR: // Probably nothing meaningful can be returned here. + fail(DL, DAG, "WebAssembly hasn't implemented __builtin_return_address"); + return SDValue(); + case ISD::FRAMEADDR: + return LowerFRAMEADDR(Op, DAG); + case ISD::CopyToReg: + return LowerCopyToReg(Op, DAG); } } +SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op, + SelectionDAG &DAG) const { + SDValue Src = Op.getOperand(2); + if (isa<FrameIndexSDNode>(Src.getNode())) { + // CopyToReg nodes don't support FrameIndex operands. Other targets select + // the FI to some LEA-like instruction, but since we don't have that, we + // need to insert some kind of instruction that can take an FI operand and + // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy + // copy_local between Op and its FI operand. + SDValue Chain = Op.getOperand(0); + SDLoc DL(Op); + unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg(); + EVT VT = Src.getValueType(); + SDValue Copy( + DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_LOCAL_I32 + : WebAssembly::COPY_LOCAL_I64, + DL, VT, Src), + 0); + return Op.getNode()->getNumValues() == 1 + ? DAG.getCopyToReg(Chain, DL, Reg, Copy) + : DAG.getCopyToReg(Chain, DL, Reg, Copy, Op.getNumOperands() == 4 + ? Op.getOperand(3) + : SDValue()); + } + return SDValue(); +} + SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const { int FI = cast<FrameIndexSDNode>(Op)->getIndex(); return DAG.getTargetFrameIndex(FI, Op.getValueType()); } +SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op, + SelectionDAG &DAG) const { + // Non-zero depths are not supported by WebAssembly currently. Use the + // legalizer's default expansion, which is to return 0 (what this function is + // documented to do). + if (Op.getConstantOperandVal(0) > 0) + return SDValue(); + + DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); + EVT VT = Op.getValueType(); + unsigned FP = + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); + return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT); +} + SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -582,9 +618,8 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset())); } -SDValue -WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, - SelectionDAG &DAG) const { +SDValue WebAssemblyTargetLowering::LowerExternalSymbol( + SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); const auto *ES = cast<ExternalSymbolSDNode>(Op); EVT VT = Op.getValueType(); @@ -603,7 +638,7 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op, SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // There's no need for a Wrapper node because we always incorporate a jump - // table operand into a TABLESWITCH instruction, rather than ever + // table operand into a BR_TABLE instruction, rather than ever // materializing it in a register. const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(), @@ -625,16 +660,15 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo(); const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs; + // Add an operand for each case. + for (auto MBB : MBBs) Ops.push_back(DAG.getBasicBlock(MBB)); + // TODO: For now, we just pick something arbitrary for a default case for now. // We really want to sniff out the guard and put in the real default case (and // delete the guard). Ops.push_back(DAG.getBasicBlock(MBBs[0])); - // Add an operand for each case. - for (auto MBB : MBBs) - Ops.push_back(DAG.getBasicBlock(MBB)); - - return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops); + return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops); } SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, @@ -642,16 +676,13 @@ SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, SDLoc DL(Op); EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); - // The incoming non-fixed arguments are placed on the top of the stack, with - // natural alignment, at the point of the call, so the base pointer is just - // the current frame pointer. - DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); - unsigned FP = - Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); + auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>(); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); - return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), - MachinePointerInfo(SV), false, false, 0); + + SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL, + MFI->getVarargBufferVreg(), PtrVT); + return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1), + MachinePointerInfo(SV), 0); } //===----------------------------------------------------------------------===// diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index e7232a042e1..5bc723028e6 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -29,17 +29,17 @@ enum NodeType : unsigned { #undef HANDLE_NODETYPE }; -} // end namespace WebAssemblyISD +} // end namespace WebAssemblyISD class WebAssemblySubtarget; class WebAssemblyTargetMachine; class WebAssemblyTargetLowering final : public TargetLowering { -public: + public: WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI); -private: + private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. const WebAssemblySubtarget *Subtarget; @@ -49,13 +49,16 @@ private: bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override; const char *getTargetNodeName(unsigned Opcode) const override; - std::pair<unsigned, const TargetRegisterClass *> - getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, - StringRef Constraint, MVT VT) const override; + std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint( + const TargetRegisterInfo *TRI, StringRef Constraint, + MVT VT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; + bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, + bool *Fast) const override; + bool isIntDivCheap(EVT VT, AttributeSet Attr) const override; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const override; @@ -65,29 +68,31 @@ private: LLVMContext &Context) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, - const SmallVectorImpl<SDValue> &OutVals, SDLoc dl, + const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl<ISD::InputArg> &Ins, - SDLoc DL, SelectionDAG &DAG, + const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const override; // Custom lowering hooks. SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const; }; namespace WebAssembly { FastISel *createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo); -} // end namespace WebAssembly +} // end namespace WebAssembly -} // end namespace llvm +} // end namespace llvm #endif diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td index fda95953db8..444e275c6eb 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -16,12 +16,12 @@ let Defs = [ARGUMENTS] in { let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { // The condition operand is a boolean value which WebAssembly represents as i32. -def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst), +def BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond), [(brcond I32:$cond, bb:$dst)], - "br_if \t$cond, $dst">; + "br_if \t$dst, $cond">; let isCodeGenOnly = 1 in -def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [], - "br_unless\t$cond, $dst">; +def BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), [], + "br_unless\t$dst, $cond">; let isBarrier = 1 in { def BR : I<(outs), (ins bb_op:$dst), [(br bb:$dst)], @@ -32,27 +32,27 @@ def BR : I<(outs), (ins bb_op:$dst), } // Defs = [ARGUMENTS] def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), - (BR_IF I32:$cond, bb_op:$dst)>; + (BR_IF bb_op:$dst, I32:$cond)>; def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), - (BR_UNLESS I32:$cond, bb_op:$dst)>; + (BR_UNLESS bb_op:$dst, I32:$cond)>; let Defs = [ARGUMENTS] in { // TODO: SelectionDAG's lowering insists on using a pointer as the index for -// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode +// jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode // currently. // Set TSFlags{0} to 1 to indicate that the variable_ops are immediates. // Set TSFlags{1} to 1 to indicate that the immediates represent labels. let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { -def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), - [(WebAssemblytableswitch I32:$index, bb:$default)], - "tableswitch\t$index, $default"> { +def BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops), + [(WebAssemblybr_table I32:$index)], + "br_table \t$index"> { let TSFlags{0} = 1; let TSFlags{1} = 1; } -def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), - [(WebAssemblytableswitch I64:$index, bb:$default)], - "tableswitch\t$index, $default"> { +def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops), + [(WebAssemblybr_table I64:$index)], + "br_table \t$index"> { let TSFlags{0} = 1; let TSFlags{1} = 1; } @@ -71,6 +71,10 @@ def END_LOOP : I<(outs), (ins), [], "end_loop">; multiclass RETURN<WebAssemblyRegClass vt> { def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)], "return \t$val">; + // Equivalent to RETURN_#vt, for use at the end of a function when wasm + // semantics return by falling off the end of the block. + let isCodeGenOnly = 1 in + def FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), []>; } let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { @@ -80,6 +84,10 @@ let isReturn = 1 in { defm : RETURN<F32>; defm : RETURN<F64>; def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">; + + // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt. + let isCodeGenOnly = 1 in + def FALLTHROUGH_RETURN_VOID : I<(outs), (ins), []>; } // isReturn = 1 def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td index 5520c6de673..64569720375 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -77,12 +77,12 @@ def : Pat<(setge f64:$lhs, f64:$rhs), (GE_F64 f64:$lhs, f64:$rhs)>; let Defs = [ARGUMENTS] in { -def SELECT_F32 : I<(outs F32:$dst), (ins I32:$cond, F32:$lhs, F32:$rhs), +def SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond), [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))], - "f32.select\t$dst, $cond, $lhs, $rhs">; -def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs), + "f32.select\t$dst, $lhs, $rhs, $cond">; +def SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond), [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))], - "f64.select\t$dst, $cond, $lhs, $rhs">; + "f64.select\t$dst, $lhs, $rhs, $cond">; } // Defs = [ARGUMENTS] @@ -90,12 +90,12 @@ def SELECT_F64 : I<(outs F64:$dst), (ins I32:$cond, F64:$lhs, F64:$rhs), // WebAssembly's select interprets any non-zero value as true, so we can fold // a setne with 0 into a select. def : Pat<(select (i32 (setne I32:$cond, 0)), F32:$lhs, F32:$rhs), - (SELECT_F32 I32:$cond, F32:$lhs, F32:$rhs)>; + (SELECT_F32 F32:$lhs, F32:$rhs, I32:$cond)>; def : Pat<(select (i32 (setne I32:$cond, 0)), F64:$lhs, F64:$rhs), - (SELECT_F64 I32:$cond, F64:$lhs, F64:$rhs)>; + (SELECT_F64 F64:$lhs, F64:$rhs, I32:$cond)>; // And again, this time with seteq instead of setne and the arms reversed. def : Pat<(select (i32 (seteq I32:$cond, 0)), F32:$lhs, F32:$rhs), - (SELECT_F32 I32:$cond, F32:$rhs, F32:$lhs)>; + (SELECT_F32 F32:$rhs, F32:$lhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), F64:$lhs, F64:$rhs), - (SELECT_F64 I32:$cond, F64:$rhs, F64:$lhs)>; + (SELECT_F64 F64:$rhs, F64:$lhs, I32:$cond)>; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 028e9af0834..2fd3eab99d7 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -15,6 +15,7 @@ #include "WebAssemblyInstrInfo.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,16 +33,32 @@ WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) WebAssembly::ADJCALLSTACKUP), RI(STI.getTargetTriple()) {} +bool WebAssemblyInstrInfo::isReallyTriviallyReMaterializable( + const MachineInstr &MI, AliasAnalysis *AA) const { + switch (MI.getOpcode()) { + case WebAssembly::CONST_I32: + case WebAssembly::CONST_I64: + case WebAssembly::CONST_F32: + case WebAssembly::CONST_F64: + // isReallyTriviallyReMaterializableGeneric misses these because of the + // ARGUMENTS implicit def, so we manualy override it here. + return true; + default: + return false; + } +} + void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - DebugLoc DL, unsigned DestReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // This method is called by post-RA expansion, which expects only pregs to // exist. However we need to handle both here. auto &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterClass *RC = TargetRegisterInfo::isVirtualRegister(DestReg) ? - MRI.getRegClass(DestReg) : - MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(SrcReg); + const TargetRegisterClass *RC = + TargetRegisterInfo::isVirtualRegister(DestReg) + ? MRI.getRegClass(DestReg) + : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg); unsigned CopyLocalOpcode; if (RC == &WebAssembly::I32RegClass) @@ -59,8 +76,23 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, KillSrc ? RegState::Kill : 0); } +MachineInstr * +WebAssemblyInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + // If the operands are stackified, we can't reorder them. + WebAssemblyFunctionInfo &MFI = + *MI.getParent()->getParent()->getInfo<WebAssemblyFunctionInfo>(); + if (MFI.isVRegStackified(MI.getOperand(OpIdx1).getReg()) || + MFI.isVRegStackified(MI.getOperand(OpIdx2).getReg())) + return nullptr; + + // Otherwise use the default implementation. + return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + // Branch analysis. -bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, @@ -75,22 +107,22 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, if (HaveCond) return true; // If we're running after CFGStackify, we can't optimize further. - if (!MI.getOperand(1).isMBB()) + if (!MI.getOperand(0).isMBB()) return true; Cond.push_back(MachineOperand::CreateImm(true)); - Cond.push_back(MI.getOperand(0)); - TBB = MI.getOperand(1).getMBB(); + Cond.push_back(MI.getOperand(1)); + TBB = MI.getOperand(0).getMBB(); HaveCond = true; break; case WebAssembly::BR_UNLESS: if (HaveCond) return true; // If we're running after CFGStackify, we can't optimize further. - if (!MI.getOperand(1).isMBB()) + if (!MI.getOperand(0).isMBB()) return true; Cond.push_back(MachineOperand::CreateImm(false)); - Cond.push_back(MI.getOperand(0)); - TBB = MI.getOperand(1).getMBB(); + Cond.push_back(MI.getOperand(1)); + TBB = MI.getOperand(0).getMBB(); HaveCond = true; break; case WebAssembly::BR: @@ -133,7 +165,7 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - DebugLoc DL) const { + const DebugLoc &DL) const { if (Cond.empty()) { if (!TBB) return 0; @@ -145,13 +177,11 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, assert(Cond.size() == 2 && "Expected a flag and a successor block"); if (Cond[0].getImm()) { - BuildMI(&MBB, DL, get(WebAssembly::BR_IF)) - .addOperand(Cond[1]) - .addMBB(TBB); + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addMBB(TBB).addOperand(Cond[1]); } else { BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)) - .addOperand(Cond[1]) - .addMBB(TBB); + .addMBB(TBB) + .addOperand(Cond[1]); } if (!FBB) return 1; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h index 5ddd9b36f24..d93f958ca4c 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h @@ -34,18 +34,24 @@ public: const WebAssemblyRegisterInfo &getRegisterInfo() const { return RI; } + bool isReallyTriviallyReMaterializable(const MachineInstr &MI, + AliasAnalysis *AA) const override; + void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - DebugLoc DL, unsigned DestReg, unsigned SrcReg, + const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const override; + MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const override; - bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify = false) const override; unsigned RemoveBranch(MachineBasicBlock &MBB) const override; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond, - DebugLoc DL) const override; + const DebugLoc &DL) const override; bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override; }; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 2e682a47547..4b319871cf1 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -30,7 +30,7 @@ def SDT_WebAssemblyCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>; def SDT_WebAssemblyCall0 : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyCall1 : SDTypeProfile<1, -1, [SDTCisPtrTy<1>]>; -def SDT_WebAssemblyTableswitch : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; +def SDT_WebAssemblyBrTable : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SDT_WebAssemblyArgument : SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>; def SDT_WebAssemblyReturn : SDTypeProfile<0, -1, []>; def SDT_WebAssemblyWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, @@ -52,9 +52,9 @@ def WebAssemblycall0 : SDNode<"WebAssemblyISD::CALL0", def WebAssemblycall1 : SDNode<"WebAssemblyISD::CALL1", SDT_WebAssemblyCall1, [SDNPHasChain, SDNPVariadic]>; -def WebAssemblytableswitch : SDNode<"WebAssemblyISD::TABLESWITCH", - SDT_WebAssemblyTableswitch, - [SDNPHasChain, SDNPVariadic]>; +def WebAssemblybr_table : SDNode<"WebAssemblyISD::BR_TABLE", + SDT_WebAssemblyBrTable, + [SDNPHasChain, SDNPVariadic]>; def WebAssemblyargument : SDNode<"WebAssemblyISD::ARGUMENT", SDT_WebAssemblyArgument>; def WebAssemblyreturn : SDNode<"WebAssemblyISD::RETURN", @@ -71,10 +71,17 @@ let OperandNamespace = "WebAssembly" in { let OperandType = "OPERAND_BASIC_BLOCK" in def bb_op : Operand<OtherVT>; -let OperandType = "OPERAND_FPIMM" in { +let OperandType = "OPERAND_FP32IMM" in def f32imm_op : Operand<f32>; + +let OperandType = "OPERAND_FP64IMM" in def f64imm_op : Operand<f64>; -} // OperandType = "OPERAND_FPIMM" + +let OperandType = "OPERAND_P2ALIGN" in { +def P2Align : Operand<i32> { + let PrintMethod = "printWebAssemblyP2AlignOperand"; +} +} // OperandType = "OPERAND_P2ALIGN" } // OperandNamespace = "WebAssembly" @@ -101,15 +108,9 @@ defm : ARGUMENT<F64>; let Defs = [ARGUMENTS] in { // get_local and set_local are not generated by instruction selection; they -// are implied by virtual register uses and defs in most contexts. However, -// they are explicitly emitted for special purposes. +// are implied by virtual register uses and defs. multiclass LOCAL<WebAssemblyRegClass vt> { - def GET_LOCAL_#vt : I<(outs vt:$res), (ins i32imm:$regno), [], - "get_local\t$res, $regno">; - // TODO: set_local returns its operand value - def SET_LOCAL_#vt : I<(outs), (ins i32imm:$regno, vt:$src), [], - "set_local\t$regno, $src">; - +let hasSideEffects = 0 in { // COPY_LOCAL is not an actual instruction in wasm, but since we allow // get_local and set_local to be implicit, we can have a COPY_LOCAL which // is actually a no-op because all the work is done in the implied @@ -117,13 +118,21 @@ multiclass LOCAL<WebAssemblyRegClass vt> { let isAsCheapAsAMove = 1 in def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [], "copy_local\t$res, $src">; + + // TEE_LOCAL is similar to COPY_LOCAL, but writes two copies of its result. + // Typically this would be used to stackify one result and write the other + // result to a local. + let isAsCheapAsAMove = 1 in + def TEE_LOCAL_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), [], + "tee_local\t$res, $also, $src">; +} // hasSideEffects = 0 } defm : LOCAL<I32>; defm : LOCAL<I64>; defm : LOCAL<F32>; defm : LOCAL<F64>; -let isMoveImm = 1 in { +let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in { def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm), [(set I32:$res, imm:$imm)], "i32.const\t$res, $imm">; @@ -136,7 +145,7 @@ def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm), def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm), [(set F64:$res, fpimm:$imm)], "f64.const\t$res, $imm">; -} // isMoveImm = 1 +} // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 } // Defs = [ARGUMENTS] diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td index 09e5eafb85e..7eaa57bb217 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td @@ -36,6 +36,8 @@ defm XOR : BinaryInt<xor, "xor ">; defm SHL : BinaryInt<shl, "shl ">; defm SHR_U : BinaryInt<srl, "shr_u">; defm SHR_S : BinaryInt<sra, "shr_s">; +defm ROTL : BinaryInt<rotl, "rotl">; +defm ROTR : BinaryInt<rotr, "rotr">; let isCommutable = 1 in { defm EQ : ComparisonInt<SETEQ, "eq ">; @@ -54,22 +56,29 @@ defm CLZ : UnaryInt<ctlz, "clz ">; defm CTZ : UnaryInt<cttz, "ctz ">; defm POPCNT : UnaryInt<ctpop, "popcnt">; +def EQZ_I32 : I<(outs I32:$dst), (ins I32:$src), + [(set I32:$dst, (setcc I32:$src, 0, SETEQ))], + "i32.eqz \t$dst, $src">; +def EQZ_I64 : I<(outs I32:$dst), (ins I64:$src), + [(set I32:$dst, (setcc I64:$src, 0, SETEQ))], + "i64.eqz \t$dst, $src">; + } // Defs = [ARGUMENTS] -// Expand the "don't care" operations to supported operations. -def : Pat<(ctlz_zero_undef I32:$src), (CLZ_I32 I32:$src)>; -def : Pat<(ctlz_zero_undef I64:$src), (CLZ_I64 I64:$src)>; -def : Pat<(cttz_zero_undef I32:$src), (CTZ_I32 I32:$src)>; -def : Pat<(cttz_zero_undef I64:$src), (CTZ_I64 I64:$src)>; +// Optimize away an explicit mask on a rotate count. +def : Pat<(rotl I32:$lhs, (and I32:$rhs, 31)), (ROTL_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(rotr I32:$lhs, (and I32:$rhs, 31)), (ROTR_I32 I32:$lhs, I32:$rhs)>; +def : Pat<(rotl I64:$lhs, (and I64:$rhs, 63)), (ROTL_I64 I64:$lhs, I64:$rhs)>; +def : Pat<(rotr I64:$lhs, (and I64:$rhs, 63)), (ROTR_I64 I64:$lhs, I64:$rhs)>; let Defs = [ARGUMENTS] in { -def SELECT_I32 : I<(outs I32:$dst), (ins I32:$cond, I32:$lhs, I32:$rhs), +def SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond), [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))], - "i32.select\t$dst, $cond, $lhs, $rhs">; -def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs), + "i32.select\t$dst, $lhs, $rhs, $cond">; +def SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond), [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))], - "i64.select\t$dst, $cond, $lhs, $rhs">; + "i64.select\t$dst, $lhs, $rhs, $cond">; } // Defs = [ARGUMENTS] @@ -77,12 +86,12 @@ def SELECT_I64 : I<(outs I64:$dst), (ins I32:$cond, I64:$lhs, I64:$rhs), // WebAssembly's select interprets any non-zero value as true, so we can fold // a setne with 0 into a select. def : Pat<(select (i32 (setne I32:$cond, 0)), I32:$lhs, I32:$rhs), - (SELECT_I32 I32:$cond, I32:$lhs, I32:$rhs)>; + (SELECT_I32 I32:$lhs, I32:$rhs, I32:$cond)>; def : Pat<(select (i32 (setne I32:$cond, 0)), I64:$lhs, I64:$rhs), - (SELECT_I64 I32:$cond, I64:$lhs, I64:$rhs)>; + (SELECT_I64 I64:$lhs, I64:$rhs, I32:$cond)>; // And again, this time with seteq instead of setne and the arms reversed. def : Pat<(select (i32 (seteq I32:$cond, 0)), I32:$lhs, I32:$rhs), - (SELECT_I32 I32:$cond, I32:$rhs, I32:$lhs)>; + (SELECT_I32 I32:$rhs, I32:$lhs, I32:$cond)>; def : Pat<(select (i32 (seteq I32:$cond, 0)), I64:$lhs, I64:$rhs), - (SELECT_I64 I32:$cond, I64:$rhs, I64:$lhs)>; + (SELECT_I64 I64:$rhs, I64:$lhs, I32:$cond)>; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index b39ac5212f8..521c664ca4a 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -28,6 +28,18 @@ def regPlusImm : PatFrag<(ops node:$addr, node:$off), (add node:$addr, node:$off), [{ return N->getFlags()->hasNoUnsignedWrap(); }]>; +// Treat an 'or' node as an 'add' if the or'ed bits are known to be zero. +def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); + + APInt KnownZero0, KnownOne0; + CurDAG->computeKnownBits(N->getOperand(0), KnownZero0, KnownOne0, 0); + APInt KnownZero1, KnownOne1; + CurDAG->computeKnownBits(N->getOperand(1), KnownZero1, KnownOne1, 0); + return (~KnownZero0 & ~KnownZero1) == 0; +}]>; + // GlobalAddresses are conceptually unsigned values, so we can also fold them // into immediate values as long as their offsets are non-negative. def regPlusGA : PatFrag<(ops node:$addr, node:$off), @@ -46,325 +58,392 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off), let Defs = [ARGUMENTS] in { // Basic load. -def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load\t$dst, ${off}(${addr})">; -def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load\t$dst, ${off}(${addr})">; -def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr), [], - "f32.load\t$dst, ${off}(${addr})">; -def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr), [], - "f64.load\t$dst, ${off}(${addr})">; +def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "f32.load\t$dst, ${off}(${addr})${p2align}">; +def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "f64.load\t$dst, ${off}(${addr})${p2align}">; } // Defs = [ARGUMENTS] // Select loads with no constant offset. -def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; -def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; -def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; -def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>; // Select loads with a constant offset. def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_I32 imm:$off, $addr)>; + (LOAD_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_I64 imm:$off, $addr)>; + (LOAD_I64 imm:$off, $addr, 0)>; def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_F32 imm:$off, $addr)>; + (LOAD_F32 imm:$off, $addr, 0)>; def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))), - (LOAD_F64 imm:$off, $addr)>; + (LOAD_F64 imm:$off, $addr, 0)>; +def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_I64 imm:$off, $addr, 0)>; +def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_F32 imm:$off, $addr, 0)>; +def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))), + (LOAD_F64 imm:$off, $addr, 0)>; def : Pat<(i32 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_I32 tglobaladdr:$off, $addr)>; + (LOAD_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_I64 tglobaladdr:$off, $addr)>; + (LOAD_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(f32 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_F32 tglobaladdr:$off, $addr)>; + (LOAD_F32 tglobaladdr:$off, $addr, 0)>; def : Pat<(f64 (load (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD_F64 tglobaladdr:$off, $addr)>; + (LOAD_F64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_I32 texternalsym:$off, $addr)>; + (LOAD_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_I64 texternalsym:$off, $addr)>; + (LOAD_I64 texternalsym:$off, $addr, 0)>; def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_F32 texternalsym:$off, $addr)>; + (LOAD_F32 texternalsym:$off, $addr, 0)>; def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD_F64 texternalsym:$off, $addr)>; + (LOAD_F64 texternalsym:$off, $addr, 0)>; // Select loads with just a constant offset. -def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0))>; -def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_F32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))), - (LOAD_F64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_F32 texternalsym:$off, (CONST_I32 0))>; + (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))), - (LOAD_F64 texternalsym:$off, (CONST_I32 0))>; + (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>; let Defs = [ARGUMENTS] in { // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load8_s\t$dst, ${off}(${addr})">; -def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load8_u\t$dst, ${off}(${addr})">; -def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load16_s\t$dst, ${off}(${addr})">; -def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr), [], - "i32.load16_u\t$dst, ${off}(${addr})">; -def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load8_s\t$dst, ${off}(${addr})">; -def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load8_u\t$dst, ${off}(${addr})">; -def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load16_s\t$dst, ${off}(${addr})">; -def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load16_u\t$dst, ${off}(${addr})">; -def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load32_s\t$dst, ${off}(${addr})">; -def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr), [], - "i64.load32_u\t$dst, ${off}(${addr})">; +def LOAD8_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load8_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load8_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load16_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i32.load16_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load8_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load8_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load16_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load16_u\t$dst, ${off}(${addr})${p2align}">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load32_s\t$dst, ${off}(${addr})${p2align}">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align), [], + "i64.load32_u\t$dst, ${off}(${addr})${p2align}">; } // Defs = [ARGUMENTS] // Select extending loads with no constant offset. -def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; -def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; -def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; -def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; -def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; -def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; -def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; -def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>; // Select extending loads with a constant offset. def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_S_I32 imm:$off, $addr)>; + (LOAD8_S_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I32 imm:$off, $addr)>; + (LOAD8_U_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_S_I32 imm:$off, $addr)>; + (LOAD16_S_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I32 imm:$off, $addr)>; + (LOAD16_U_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_S_I64 imm:$off, $addr)>; + (LOAD8_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I64 imm:$off, $addr)>; + (LOAD8_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_S_I64 imm:$off, $addr)>; + (LOAD16_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I64 imm:$off, $addr)>; + (LOAD16_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_S_I64 imm:$off, $addr)>; + (LOAD32_S_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_U_I64 imm:$off, $addr)>; + (LOAD32_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_S_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_S_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_S_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_S_I32 tglobaladdr:$off, $addr)>; + (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I32 tglobaladdr:$off, $addr)>; + (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_S_I32 tglobaladdr:$off, $addr)>; + (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I32 tglobaladdr:$off, $addr)>; + (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_S_I64 tglobaladdr:$off, $addr)>; + (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I64 tglobaladdr:$off, $addr)>; + (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_S_I64 tglobaladdr:$off, $addr)>; + (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I64 tglobaladdr:$off, $addr)>; + (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_S_I64 tglobaladdr:$off, $addr)>; + (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_U_I64 tglobaladdr:$off, $addr)>; + (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (sextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_S_I32 texternalsym:$off, $addr)>; + (LOAD8_S_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (zextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I32 texternalsym:$off, $addr)>; + (LOAD8_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (sextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_S_I32 texternalsym:$off, $addr)>; + (LOAD16_S_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (zextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I32 texternalsym:$off, $addr)>; + (LOAD16_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_S_I64 texternalsym:$off, $addr)>; + (LOAD8_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I64 texternalsym:$off, $addr)>; + (LOAD8_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_S_I64 texternalsym:$off, $addr)>; + (LOAD16_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I64 texternalsym:$off, $addr)>; + (LOAD16_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (sextloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_S_I64 texternalsym:$off, $addr)>; + (LOAD32_S_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (zextloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_U_I64 texternalsym:$off, $addr)>; + (LOAD32_U_I64 texternalsym:$off, $addr, 0)>; // Select extending loads with just a constant offset. -def : Pat<(i32 (sextloadi8 imm:$off)), (LOAD8_S_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (zextloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (sextloadi16 imm:$off)), (LOAD16_S_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (zextloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi8 imm:$off)), (LOAD8_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi16 imm:$off)), (LOAD16_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (sextloadi32 imm:$off)), (LOAD32_S_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (zextloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (sextloadi8 imm:$off)), + (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (zextloadi8 imm:$off)), + (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (sextloadi16 imm:$off)), + (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (zextloadi16 imm:$off)), + (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi8 imm:$off)), + (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi8 imm:$off)), + (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi16 imm:$off)), + (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi16 imm:$off)), + (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (sextloadi32 imm:$off)), + (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (zextloadi32 imm:$off)), + (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; // Resolve "don't care" extending loads to zero-extending loads. This is // somewhat arbitrary, but zero-extending is conceptually simpler. // Select "don't care" extending loads with no constant offset. -def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; -def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; -def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; -def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; -def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>; // Select "don't care" extending loads with a constant offset. def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I32 imm:$off, $addr)>; + (LOAD8_U_I32 imm:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I32 imm:$off, $addr)>; + (LOAD16_U_I32 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))), - (LOAD8_U_I64 imm:$off, $addr)>; + (LOAD8_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))), - (LOAD16_U_I64 imm:$off, $addr)>; + (LOAD16_U_I64 imm:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))), - (LOAD32_U_I64 imm:$off, $addr)>; + (LOAD32_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I32 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))), + (LOAD8_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))), + (LOAD16_U_I64 imm:$off, $addr, 0)>; +def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))), + (LOAD32_U_I64 imm:$off, $addr, 0)>; def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I32 tglobaladdr:$off, $addr)>; + (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I32 tglobaladdr:$off, $addr)>; + (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD8_U_I64 tglobaladdr:$off, $addr)>; + (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD16_U_I64 tglobaladdr:$off, $addr)>; + (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off)))), - (LOAD32_U_I64 tglobaladdr:$off, $addr)>; + (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>; def : Pat<(i32 (extloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I32 texternalsym:$off, $addr)>; + (LOAD8_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i32 (extloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I32 texternalsym:$off, $addr)>; + (LOAD16_U_I32 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi8 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD8_U_I64 texternalsym:$off, $addr)>; + (LOAD8_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi16 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD16_U_I64 texternalsym:$off, $addr)>; + (LOAD16_U_I64 texternalsym:$off, $addr, 0)>; def : Pat<(i64 (extloadi32 (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))), - (LOAD32_U_I64 texternalsym:$off, $addr)>; + (LOAD32_U_I64 texternalsym:$off, $addr, 0)>; // Select "don't care" extending loads with just a constant offset. -def : Pat<(i32 (extloadi8 imm:$off)), (LOAD8_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i32 (extloadi16 imm:$off)), (LOAD16_U_I32 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi8 imm:$off)), (LOAD8_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi16 imm:$off)), (LOAD16_U_I64 imm:$off, (CONST_I32 0))>; -def : Pat<(i64 (extloadi32 imm:$off)), (LOAD32_U_I64 imm:$off, (CONST_I32 0))>; +def : Pat<(i32 (extloadi8 imm:$off)), + (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i32 (extloadi16 imm:$off)), + (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi8 imm:$off)), + (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi16 imm:$off)), + (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>; +def : Pat<(i64 (extloadi32 imm:$off)), + (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))), - (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))), - (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0))>; + (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>; def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))), - (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0))>; + (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>; let Defs = [ARGUMENTS] in { @@ -374,205 +453,232 @@ let Defs = [ARGUMENTS] in { // instruction definition patterns that don't reference all of the output // operands. // Note: WebAssembly inverts SelectionDAG's usual operand order. -def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store\t$dst, ${off}(${addr}), $val">; -def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store\t$dst, ${off}(${addr}), $val">; -def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, F32:$val), [], - "f32.store\t$dst, ${off}(${addr}), $val">; -def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, F64:$val), [], - "f64.store\t$dst, ${off}(${addr}), $val">; +def STORE_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, F32:$val), [], + "f32.store\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, F64:$val), [], + "f64.store\t$dst, ${off}(${addr})${p2align}, $val">; } // Defs = [ARGUMENTS] // Select stores with no constant offset. -def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>; // Select stores with a constant offset. def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_F32 imm:$off, I32:$addr, F32:$val)>; + (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE_F64 imm:$off, I32:$addr, F64:$val)>; + (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>; +def : Pat<(store I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(store I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(store F32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>; +def : Pat<(store F64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>; def : Pat<(store I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>; + (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>; + (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>; def : Pat<(store I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(store I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(store F32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>; + (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>; def : Pat<(store F64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>; + (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>; // Select stores with just a constant offset. def : Pat<(store I32:$val, imm:$off), - (STORE_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, imm:$off), - (STORE_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, imm:$off), - (STORE_F32 imm:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, imm:$off), - (STORE_F64 imm:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>; def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_F32 tglobaladdr:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE_F64 tglobaladdr:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>; def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_F32 texternalsym:$off, (CONST_I32 0), F32:$val)>; + (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>; def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE_F64 texternalsym:$off, (CONST_I32 0), F64:$val)>; + (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>; let Defs = [ARGUMENTS] in { // Truncating store. -def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store8\t$dst, ${off}(${addr}), $val">; -def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, I32:$val), [], - "i32.store16\t$dst, ${off}(${addr}), $val">; -def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store8\t$dst, ${off}(${addr}), $val">; -def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store16\t$dst, ${off}(${addr}), $val">; -def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, I64:$val), [], - "i64.store32\t$dst, ${off}(${addr}), $val">; +def STORE8_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store8\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I32:$val), [], + "i32.store16\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store8\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store16\t$dst, ${off}(${addr})${p2align}, $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr, + P2Align:$p2align, I64:$val), [], + "i64.store32\t$dst, ${off}(${addr})${p2align}, $val">; } // Defs = [ARGUMENTS] // Select truncating stores with no constant offset. def : Pat<(truncstorei8 I32:$val, I32:$addr), - (STORE8_I32 0, I32:$addr, I32:$val)>; + (STORE8_I32 0, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), - (STORE16_I32 0, I32:$addr, I32:$val)>; + (STORE16_I32 0, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, I32:$addr), - (STORE8_I64 0, I32:$addr, I64:$val)>; + (STORE8_I64 0, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, I32:$addr), - (STORE16_I64 0, I32:$addr, I64:$val)>; + (STORE16_I64 0, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, I32:$addr), - (STORE32_I64 0, I32:$addr, I64:$val)>; + (STORE32_I64 0, I32:$addr, 0, I64:$val)>; // Select truncating stores with a constant offset. def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE8_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE16_I32 imm:$off, I32:$addr, I32:$val)>; + (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE8_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE16_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)), - (STORE32_I64 imm:$off, I32:$addr, I64:$val)>; + (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei8 I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(truncstorei16 I32:$val, (or_is_add I32:$addr, imm:$off)), + (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>; +def : Pat<(truncstorei8 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei16 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>; +def : Pat<(truncstorei32 I64:$val, (or_is_add I32:$addr, imm:$off)), + (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>; + (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (regPlusGA I32:$addr, (WebAssemblywrapper tglobaladdr:$off))), - (STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>; + (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>; + (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (add I32:$addr, (WebAssemblywrapper texternalsym:$off))), - (STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>; + (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>; // Select truncating stores with just a constant offset. def : Pat<(truncstorei8 I32:$val, imm:$off), - (STORE8_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, imm:$off), - (STORE16_I32 imm:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, imm:$off), - (STORE8_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, imm:$off), - (STORE16_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, imm:$off), - (STORE32_I64 imm:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)), - (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE8_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE16_I32 texternalsym:$off, (CONST_I32 0), I32:$val)>; + (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>; def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE8_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE16_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)), - (STORE32_I64 texternalsym:$off, (CONST_I32 0), I64:$val)>; + (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>; let Defs = [ARGUMENTS] in { -// Memory size. -def MEMORY_SIZE_I32 : I<(outs I32:$dst), (ins), - [(set I32:$dst, (int_wasm_memory_size))], - "memory_size\t$dst">, - Requires<[HasAddr32]>; -def MEMORY_SIZE_I64 : I<(outs I64:$dst), (ins), - [(set I64:$dst, (int_wasm_memory_size))], - "memory_size\t$dst">, - Requires<[HasAddr64]>; +// Current memory size. +def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins), + [(set I32:$dst, (int_wasm_current_memory))], + "current_memory\t$dst">, + Requires<[HasAddr32]>; +def CURRENT_MEMORY_I64 : I<(outs I64:$dst), (ins), + [(set I64:$dst, (int_wasm_current_memory))], + "current_memory\t$dst">, + Requires<[HasAddr64]>; // Grow memory. def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta), diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp index b009a4e054c..af53f3db967 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -16,9 +16,9 @@ //===----------------------------------------------------------------------===// #include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" @@ -61,12 +61,12 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { auto &MRI = MF.getRegInfo(); for (auto &MBB : MF) { - for (auto MII = MBB.begin(); MII != MBB.end(); ) { + for (auto MII = MBB.begin(); MII != MBB.end();) { MachineInstr *MI = &*MII++; if (MI->getOpcode() != WebAssembly::BR_UNLESS) continue; - unsigned Cond = MI->getOperand(0).getReg(); + unsigned Cond = MI->getOperand(1).getReg(); bool Inverted = false; // Attempt to invert the condition in place. @@ -74,7 +74,7 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { assert(MRI.hasOneDef(Cond)); MachineInstr *Def = MRI.getVRegDef(Cond); switch (Def->getOpcode()) { - using namespace WebAssembly; + using namespace WebAssembly; case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; @@ -106,15 +106,10 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { // If we weren't able to invert the condition in place. Insert an // expression to invert it. if (!Inverted) { - unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - MFI.stackifyVReg(ZeroReg); - BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg) - .addImm(0); unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); MFI.stackifyVReg(Tmp); - BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp) - .addReg(Cond) - .addReg(ZeroReg); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp) + .addReg(Cond); Cond = Tmp; Inverted = true; } @@ -123,8 +118,8 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { // delete the br_unless. assert(Inverted); BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) - .addReg(Cond) - .addOperand(MI->getOperand(1)); + .addOperand(MI->getOperand(0)) + .addReg(Cond); MBB.erase(MI); } } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 6a60280900a..89f607d84b7 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -39,18 +39,24 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo { /// - defined and used in LIFO order with other stack registers BitVector VRegStackified; - // One entry for each possible target reg. we expect it to be small. - std::vector<unsigned> PhysRegs; + // A virtual register holding the pointer to the vararg buffer for vararg + // functions. It is created and set in TLI::LowerFormalArguments and read by + // TLI::LowerVASTART + unsigned VarargVreg = -1U; -public: - explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) { - PhysRegs.resize(WebAssembly::NUM_TARGET_REGS, -1U); - } + public: + explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {} ~WebAssemblyFunctionInfo() override; void addParam(MVT VT) { Params.push_back(VT); } const std::vector<MVT> &getParams() const { return Params; } + unsigned getVarargBufferVreg() const { + assert(VarargVreg != -1U && "Vararg vreg hasn't been set"); + return VarargVreg; + } + void setVarargBufferVreg(unsigned Reg) { VarargVreg = Reg; } + static const unsigned UnusedReg = -1u; void stackifyVReg(unsigned VReg) { @@ -71,25 +77,15 @@ public: WARegs[TargetRegisterInfo::virtReg2Index(VReg)] = WAReg; } unsigned getWAReg(unsigned Reg) const { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); - return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; - } - return PhysRegs[Reg]; - } - // If new virtual registers are created after initWARegs has been called, - // this function can be used to add WebAssembly register mappings for them. - void addWAReg(unsigned VReg, unsigned WAReg) { - assert(VReg = WARegs.size()); - WARegs.push_back(WAReg); + assert(TargetRegisterInfo::virtReg2Index(Reg) < WARegs.size()); + return WARegs[TargetRegisterInfo::virtReg2Index(Reg)]; } - void addPReg(unsigned PReg, unsigned WAReg) { - assert(PReg < WebAssembly::NUM_TARGET_REGS); - assert(WAReg < -1U); - PhysRegs[PReg] = WAReg; + // For a given stackified WAReg, return the id number to print with push/pop. + static unsigned getWARegStackId(unsigned Reg) { + assert(Reg & INT32_MIN); + return Reg & INT32_MAX; } - const std::vector<unsigned> &getPhysRegs() const { return PhysRegs; } }; } // end namespace llvm diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp new file mode 100644 index 00000000000..473de7ddae7 --- /dev/null +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp @@ -0,0 +1,105 @@ +//===--- WebAssemblyOptimizeLiveIntervals.cpp - LiveInterval processing ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Optimize LiveIntervals for use in a post-RA context. +// +/// LiveIntervals normally runs before register allocation when the code is +/// only recently lowered out of SSA form, so it's uncommon for registers to +/// have multiple defs, and then they do, the defs are usually closely related. +/// Later, after coalescing, tail duplication, and other optimizations, it's +/// more common to see registers with multiple unrelated defs. This pass +/// updates LiveIntervalAnalysis to distribute the value numbers across separate +/// LiveIntervals. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-optimize-live-intervals" + +namespace { +class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Optimize Live Intervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(LiveVariablesID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyOptimizeLiveIntervals() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyOptimizeLiveIntervals::ID = 0; +FunctionPass *llvm::createWebAssemblyOptimizeLiveIntervals() { + return new WebAssemblyOptimizeLiveIntervals(); +} + +bool WebAssemblyOptimizeLiveIntervals::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Optimize LiveIntervals **********\n" + "********** Function: " + << MF.getName() << '\n'); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && + "OptimizeLiveIntervals expects liveness"); + + // Split multiple-VN LiveIntervals into multiple LiveIntervals. + SmallVector<LiveInterval*, 4> SplitLIs; + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + + LIS.splitSeparateComponents(LIS.getInterval(Reg), SplitLIs); + SplitLIs.clear(); + } + + // In PrepareForLiveIntervals, we conservatively inserted IMPLICIT_DEF + // instructions to satisfy LiveIntervals' requirement that all uses be + // dominated by defs. Now that LiveIntervals has computed which of these + // defs are actually needed and which are dead, remove the dead ones. + for (auto MII = MF.begin()->begin(), MIE = MF.begin()->end(); MII != MIE; ) { + MachineInstr *MI = &*MII++; + if (MI->isImplicitDef() && MI->getOperand(0).isDead()) { + LiveInterval &LI = LIS.getInterval(MI->getOperand(0).getReg()); + LIS.removeVRegDefAt(LI, LIS.getInstructionIndex(*MI).getRegSlot()); + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } + + return false; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index 4ad6eed7385..56d44e6466e 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -12,14 +12,23 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssembly.h" #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "wasm-peephole" +static cl::opt<bool> DisableWebAssemblyFallthroughReturnOpt( + "disable-wasm-fallthrough-return-opt", cl::Hidden, + cl::desc("WebAssembly: Disable fallthrough-return optimizations."), + cl::init(false)); + namespace { class WebAssemblyPeephole final : public MachineFunctionPass { const char *getPassName() const override { @@ -28,6 +37,7 @@ class WebAssemblyPeephole final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -44,11 +54,65 @@ FunctionPass *llvm::createWebAssemblyPeephole() { return new WebAssemblyPeephole(); } -bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { +/// If desirable, rewrite NewReg to a drop register. +static bool MaybeRewriteToDrop(unsigned OldReg, unsigned NewReg, + MachineOperand &MO, WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { bool Changed = false; + if (OldReg == NewReg) { + Changed = true; + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + MO.setReg(NewReg); + MO.setIsDead(); + MFI.stackifyVReg(NewReg); + } + return Changed; +} + +static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB, + const MachineFunction &MF, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo &TII, + unsigned FallthroughOpc, + unsigned CopyLocalOpc) { + if (DisableWebAssemblyFallthroughReturnOpt) + return false; + if (&MBB != &MF.back()) + return false; + if (&MI != &MBB.back()) + return false; + + // If the operand isn't stackified, insert a COPY_LOCAL to read the operand + // and stackify it. + MachineOperand &MO = MI.getOperand(0); + unsigned Reg = MO.getReg(); + if (!MFI.isVRegStackified(Reg)) { + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(CopyLocalOpc), NewReg) + .addReg(Reg); + MO.setReg(NewReg); + MFI.stackifyVReg(NewReg); + } + + // Rewrite the return. + MI.setDesc(TII.get(FallthroughOpc)); + return true; +} + +bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Peephole **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering(); + auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + bool Changed = false; for (auto &MBB : MF) for (auto &MI : MBB) @@ -66,20 +130,67 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::STORE_I64: { // Store instructions return their value operand. If we ended up using // the same register for both, replace it with a dead def so that it - // can use $discard instead. + // can use $drop instead. MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); - // TODO: Handle SP/physregs - if (OldReg == MI.getOperand(3).getReg() - && TargetRegisterInfo::isVirtualRegister(MI.getOperand(3).getReg())) { - Changed = true; - unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); - MO.setReg(NewReg); - MO.setIsDead(); - MFI.stackifyVReg(NewReg); - MFI.addWAReg(NewReg, WebAssemblyFunctionInfo::UnusedReg); + unsigned NewReg = + MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); + Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI); + break; + } + case WebAssembly::CALL_I32: + case WebAssembly::CALL_I64: { + MachineOperand &Op1 = MI.getOperand(1); + if (Op1.isSymbol()) { + StringRef Name(Op1.getSymbolName()); + if (Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET)) { + LibFunc::Func Func; + if (LibInfo.getLibFunc(Name, Func)) { + const auto &Op2 = MI.getOperand(2); + if (!Op2.isReg()) + report_fatal_error("Peephole: call to builtin function with " + "wrong signature, not consuming reg"); + MachineOperand &MO = MI.getOperand(0); + unsigned OldReg = MO.getReg(); + unsigned NewReg = Op2.getReg(); + + if (MRI.getRegClass(NewReg) != MRI.getRegClass(OldReg)) + report_fatal_error("Peephole: call to builtin function with " + "wrong signature, from/to mismatch"); + Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI); + } + } } + break; } + // Optimize away an explicit void return at the end of the function. + case WebAssembly::RETURN_I32: + Changed |= MaybeRewriteToFallthrough( + MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32, + WebAssembly::COPY_LOCAL_I32); + break; + case WebAssembly::RETURN_I64: + Changed |= MaybeRewriteToFallthrough( + MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64, + WebAssembly::COPY_LOCAL_I64); + break; + case WebAssembly::RETURN_F32: + Changed |= MaybeRewriteToFallthrough( + MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32, + WebAssembly::COPY_LOCAL_F32); + break; + case WebAssembly::RETURN_F64: + Changed |= MaybeRewriteToFallthrough( + MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64, + WebAssembly::COPY_LOCAL_F64); + break; + case WebAssembly::RETURN_VOID: + if (!DisableWebAssemblyFallthroughReturnOpt && + &MBB == &MF.back() && &MI == &MBB.back()) + MI.setDesc(TII.get(WebAssembly::FALLTHROUGH_RETURN_VOID)); + break; } return Changed; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp new file mode 100644 index 00000000000..30444ac598a --- /dev/null +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp @@ -0,0 +1,136 @@ +//===- WebAssemblyPrepareForLiveIntervals.cpp - Prepare for LiveIntervals -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Fix up code to meet LiveInterval's requirements. +/// +/// Some CodeGen passes don't preserve LiveInterval's requirements, because +/// they run after register allocation and it isn't important. However, +/// WebAssembly runs LiveIntervals in a late pass. This pass transforms code +/// to meet LiveIntervals' requirements; primarily, it ensures that all +/// virtual register uses have definitions (IMPLICIT_DEF definitions if +/// nothing else). +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-prepare-for-live-intervals" + +namespace { +class WebAssemblyPrepareForLiveIntervals final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {} + +private: + const char *getPassName() const override { + return "WebAssembly Prepare For LiveIntervals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyPrepareForLiveIntervals::ID = 0; +FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() { + return new WebAssemblyPrepareForLiveIntervals(); +} + +/// Test whether the given instruction is an ARGUMENT. +static bool IsArgument(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + default: + return false; + } +} + +// Test whether the given register has an ARGUMENT def. +static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) { + for (auto &Def : MRI.def_instructions(Reg)) + if (IsArgument(&Def)) + return true; + return false; +} + +bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Prepare For LiveIntervals **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + MachineBasicBlock &Entry = *MF.begin(); + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + + // We don't preserve SSA form. + MRI.leaveSSA(); + + // BranchFolding and perhaps other passes don't preserve IMPLICIT_DEF + // instructions. LiveIntervals requires that all paths to virtual register + // uses provide a definition. Insert IMPLICIT_DEFs in the entry block to + // conservatively satisfy this. + // + // TODO: This is fairly heavy-handed; find a better approach. + // + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // Skip unused registers. + if (MRI.use_nodbg_empty(Reg)) + continue; + + // Skip registers that have an ARGUMENT definition. + if (HasArgumentDef(Reg, MRI)) + continue; + + BuildMI(Entry, Entry.begin(), DebugLoc(), + TII.get(WebAssembly::IMPLICIT_DEF), Reg); + Changed = true; + } + + // Move ARGUMENT_* instructions to the top of the entry block, so that their + // liveness reflects the fact that these really are live-in values. + for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) { + MachineInstr *MI = &*MII++; + if (IsArgument(MI)) { + MI->removeFromParent(); + Entry.insert(Entry.begin(), MI); + } + } + + // Ok, we're now ready to run LiveIntervalAnalysis again. + MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness); + + return Changed; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp index 9ec66595d8d..dedd9108dfd 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp @@ -66,7 +66,7 @@ static float computeWeight(const MachineRegisterInfo *MRI, float weight = 0.0f; for (MachineOperand &MO : MRI->reg_nodbg_operands(VReg)) weight += LiveIntervals::getSpillWeight(MO.isDef(), MO.isUse(), MBFI, - MO.getParent()); + *MO.getParent()); return weight; } @@ -99,7 +99,7 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) { unsigned VReg = TargetRegisterInfo::index2VirtReg(i); if (MFI.isVRegStackified(VReg)) continue; - // Skip unused registers, which can use $discard. + // Skip unused registers, which can use $drop. if (MRI->use_empty(VReg)) continue; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp index f621db070b5..4a8fd96f832 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp @@ -18,8 +18,8 @@ #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" #include "llvm/ADT/SCCIterator.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -61,7 +61,6 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); MachineRegisterInfo &MRI = MF.getRegInfo(); - const MachineFrameInfo &FrameInfo = *MF.getFrameInfo(); MFI.initWARegs(); @@ -73,9 +72,13 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::ARGUMENT_I32: case WebAssembly::ARGUMENT_I64: case WebAssembly::ARGUMENT_F32: - case WebAssembly::ARGUMENT_F64: - MFI.setWAReg(MI.getOperand(0).getReg(), MI.getOperand(1).getImm()); + case WebAssembly::ARGUMENT_F64: { + int64_t Imm = MI.getOperand(1).getImm(); + DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg " + << Imm << "\n"); + MFI.setWAReg(MI.getOperand(0).getReg(), Imm); break; + } default: break; } @@ -84,26 +87,27 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) { // Then assign regular WebAssembly registers for all remaining used // virtual registers. TODO: Consider sorting the registers by frequency of // use, to maximize usage of small immediate fields. - unsigned NumArgRegs = MFI.getParams().size(); unsigned NumVRegs = MF.getRegInfo().getNumVirtRegs(); unsigned NumStackRegs = 0; - unsigned CurReg = 0; + // Start the numbering for locals after the arg regs + unsigned CurReg = MFI.getParams().size(); for (unsigned VRegIdx = 0; VRegIdx < NumVRegs; ++VRegIdx) { unsigned VReg = TargetRegisterInfo::index2VirtReg(VRegIdx); + // Skip unused registers. + if (MRI.use_empty(VReg)) + continue; // Handle stackified registers. if (MFI.isVRegStackified(VReg)) { + DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " + << (INT32_MIN | NumStackRegs) << "\n"); MFI.setWAReg(VReg, INT32_MIN | NumStackRegs++); continue; } - // Skip unused registers. - if (MRI.use_empty(VReg)) - continue; - if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) - MFI.setWAReg(VReg, NumArgRegs + CurReg++); + if (MFI.getWAReg(VReg) == WebAssemblyFunctionInfo::UnusedReg) { + DEBUG(dbgs() << "VReg " << VReg << " -> WAReg " << CurReg << "\n"); + MFI.setWAReg(VReg, CurReg++); + } } - // Allocate locals for used physical registers - if (FrameInfo.getStackSize() > 0) - MFI.addPReg(WebAssembly::SP32, CurReg++); return true; } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index 537c147e614..0aa3b621da3 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -23,9 +23,12 @@ #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_* #include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" @@ -43,12 +46,13 @@ class WebAssemblyRegStackify final : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MachineDominatorTree>(); AU.addRequired<LiveIntervals>(); AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addPreserved<SlotIndexes>(); AU.addPreserved<LiveIntervals>(); - AU.addPreservedID(MachineDominatorsID); AU.addPreservedID(LiveVariablesID); + AU.addPreserved<MachineDominatorTree>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -82,17 +86,197 @@ static void ImposeStackOrdering(MachineInstr *MI) { /*isImp=*/true)); } +// Determine whether a call to the callee referenced by +// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side +// effects. +static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read, + bool &Write, bool &Effects, bool &StackPointer) { + // All calls can use the stack pointer. + StackPointer = true; + + const MachineOperand &MO = MI.getOperand(CalleeOpNo); + if (MO.isGlobal()) { + const Constant *GV = MO.getGlobal(); + if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) + if (!GA->isInterposable()) + GV = GA->getAliasee(); + + if (const Function *F = dyn_cast<Function>(GV)) { + if (!F->doesNotThrow()) + Effects = true; + if (F->doesNotAccessMemory()) + return; + if (F->onlyReadsMemory()) { + Read = true; + return; + } + } + } + + // Assume the worst. + Write = true; + Read = true; + Effects = true; +} + +// Determine whether MI reads memory, writes memory, has side effects, +// and/or uses the __stack_pointer value. +static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read, + bool &Write, bool &Effects, bool &StackPointer) { + assert(!MI.isPosition()); + assert(!MI.isTerminator()); + + if (MI.isDebugValue()) + return; + + // Check for loads. + if (MI.mayLoad() && !MI.isInvariantLoad(&AA)) + Read = true; + + // Check for stores. + if (MI.mayStore()) { + Write = true; + + // Check for stores to __stack_pointer. + for (auto MMO : MI.memoperands()) { + const MachinePointerInfo &MPI = MMO->getPointerInfo(); + if (MPI.V.is<const PseudoSourceValue *>()) { + auto PSV = MPI.V.get<const PseudoSourceValue *>(); + if (const ExternalSymbolPseudoSourceValue *EPSV = + dyn_cast<ExternalSymbolPseudoSourceValue>(PSV)) + if (StringRef(EPSV->getSymbol()) == "__stack_pointer") + StackPointer = true; + } + } + } else if (MI.hasOrderedMemoryRef()) { + switch (MI.getOpcode()) { + case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64: + case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64: + case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64: + case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64: + case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32: + case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64: + case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32: + case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64: + // These instruction have hasUnmodeledSideEffects() returning true + // because they trap on overflow and invalid so they can't be arbitrarily + // moved, however hasOrderedMemoryRef() interprets this plus their lack + // of memoperands as having a potential unknown memory reference. + break; + default: + // Record volatile accesses, unless it's a call, as calls are handled + // specially below. + if (!MI.isCall()) { + Write = true; + Effects = true; + } + break; + } + } + + // Check for side effects. + if (MI.hasUnmodeledSideEffects()) { + switch (MI.getOpcode()) { + case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64: + case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64: + case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64: + case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64: + case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32: + case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64: + case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32: + case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64: + // These instructions have hasUnmodeledSideEffects() returning true + // because they trap on overflow and invalid so they can't be arbitrarily + // moved, however in the specific case of register stackifying, it is safe + // to move them because overflow and invalid are Undefined Behavior. + break; + default: + Effects = true; + break; + } + } + + // Analyze calls. + if (MI.isCall()) { + switch (MI.getOpcode()) { + case WebAssembly::CALL_VOID: + case WebAssembly::CALL_INDIRECT_VOID: + QueryCallee(MI, 0, Read, Write, Effects, StackPointer); + break; + case WebAssembly::CALL_I32: case WebAssembly::CALL_I64: + case WebAssembly::CALL_F32: case WebAssembly::CALL_F64: + case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64: + case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64: + QueryCallee(MI, 1, Read, Write, Effects, StackPointer); + break; + default: + llvm_unreachable("unexpected call opcode"); + } + } +} + +// Test whether Def is safe and profitable to rematerialize. +static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA, + const WebAssemblyInstrInfo *TII) { + return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA); +} + +// Identify the definition for this register at this point. This is a +// generalization of MachineRegisterInfo::getUniqueVRegDef that uses +// LiveIntervals to handle complex cases. +static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert, + const MachineRegisterInfo &MRI, + const LiveIntervals &LIS) +{ + // Most registers are in SSA form here so we try a quick MRI query first. + if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg)) + return Def; + + // MRI doesn't know what the Def is. Try asking LIS. + if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore( + LIS.getInstructionIndex(*Insert))) + return LIS.getInstructionFromIndex(ValNo->def); + + return nullptr; +} + +// Test whether Reg, as defined at Def, has exactly one use. This is a +// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals +// to handle complex cases. +static bool HasOneUse(unsigned Reg, MachineInstr *Def, + MachineRegisterInfo &MRI, MachineDominatorTree &MDT, + LiveIntervals &LIS) { + // Most registers are in SSA form here so we try a quick MRI query first. + if (MRI.hasOneUse(Reg)) + return true; + + bool HasOne = false; + const LiveInterval &LI = LIS.getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoAt( + LIS.getInstructionIndex(*Def).getRegSlot()); + assert(DefVNI); + for (auto I : MRI.use_nodbg_operands(Reg)) { + const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent())); + if (Result.valueIn() == DefVNI) { + if (!Result.isKill()) + return false; + if (HasOne) + return false; + HasOne = true; + } + } + return HasOne; +} + // Test whether it's safe to move Def to just before Insert. // TODO: Compute memory dependencies in a way that doesn't require always // walking the block. // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, - AliasAnalysis &AA, LiveIntervals &LIS, - MachineRegisterInfo &MRI) { + AliasAnalysis &AA, const LiveIntervals &LIS, + const MachineRegisterInfo &MRI) { assert(Def->getParent() == Insert->getParent()); - bool SawStore = false, SawSideEffects = false; - MachineBasicBlock::const_iterator D(Def), I(Insert); // Check for register dependencies. for (const MachineOperand &MO : Def->operands()) { @@ -106,6 +290,10 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, continue; if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions + // from moving down, and we've already checked for that. + if (Reg == WebAssembly::ARGUMENTS) + continue; // If the physical register is never modified, ignore it. if (!MRI.isPhysRegModified(Reg)) continue; @@ -114,24 +302,404 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, } // Ask LiveIntervals whether moving this virtual register use or def to - // Insert will change value numbers are seen. + // Insert will change which value numbers are seen. + // + // If the operand is a use of a register that is also defined in the same + // instruction, test that the newly defined value reaches the insert point, + // since the operand will be moving along with the def. const LiveInterval &LI = LIS.getInterval(Reg); - VNInfo *DefVNI = MO.isDef() ? - LI.getVNInfoAt(LIS.getInstructionIndex(Def).getRegSlot()) : - LI.getVNInfoBefore(LIS.getInstructionIndex(Def)); + VNInfo *DefVNI = + (MO.isDef() || Def->definesRegister(Reg)) ? + LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) : + LI.getVNInfoBefore(LIS.getInstructionIndex(*Def)); assert(DefVNI && "Instruction input missing value number"); - VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(Insert)); + VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert)); if (InsVNI && DefVNI != InsVNI) return false; } - // Check for memory dependencies and side effects. - for (--I; I != D; --I) - SawSideEffects |= I->isSafeToMove(&AA, SawStore); - return !(SawStore && Def->mayLoad() && !Def->isInvariantLoad(&AA)) && - !(SawSideEffects && !Def->isSafeToMove(&AA, SawStore)); + bool Read = false, Write = false, Effects = false, StackPointer = false; + Query(*Def, AA, Read, Write, Effects, StackPointer); + + // If the instruction does not access memory and has no side effects, it has + // no additional dependencies. + if (!Read && !Write && !Effects && !StackPointer) + return true; + + // Scan through the intervening instructions between Def and Insert. + MachineBasicBlock::const_iterator D(Def), I(Insert); + for (--I; I != D; --I) { + bool InterveningRead = false; + bool InterveningWrite = false; + bool InterveningEffects = false; + bool InterveningStackPointer = false; + Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects, + InterveningStackPointer); + if (Effects && InterveningEffects) + return false; + if (Read && InterveningWrite) + return false; + if (Write && (InterveningRead || InterveningWrite)) + return false; + if (StackPointer && InterveningStackPointer) + return false; + } + + return true; +} + +/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses. +static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse, + const MachineBasicBlock &MBB, + const MachineRegisterInfo &MRI, + const MachineDominatorTree &MDT, + LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI) { + const LiveInterval &LI = LIS.getInterval(Reg); + + const MachineInstr *OneUseInst = OneUse.getParent(); + VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst)); + + for (const MachineOperand &Use : MRI.use_operands(Reg)) { + if (&Use == &OneUse) + continue; + + const MachineInstr *UseInst = Use.getParent(); + VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst)); + + if (UseVNI != OneUseVNI) + continue; + + const MachineInstr *OneUseInst = OneUse.getParent(); + if (UseInst == OneUseInst) { + // Another use in the same instruction. We need to ensure that the one + // selected use happens "before" it. + if (&OneUse > &Use) + return false; + } else { + // Test that the use is dominated by the one selected use. + while (!MDT.dominates(OneUseInst, UseInst)) { + // Actually, dominating is over-conservative. Test that the use would + // happen after the one selected use in the stack evaluation order. + // + // This is needed as a consequence of using implicit get_locals for + // uses and implicit set_locals for defs. + if (UseInst->getDesc().getNumDefs() == 0) + return false; + const MachineOperand &MO = UseInst->getOperand(0); + if (!MO.isReg()) + return false; + unsigned DefReg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DefReg) || + !MFI.isVRegStackified(DefReg)) + return false; + assert(MRI.hasOneUse(DefReg)); + const MachineOperand &NewUse = *MRI.use_begin(DefReg); + const MachineInstr *NewUseInst = NewUse.getParent(); + if (NewUseInst == OneUseInst) { + if (&OneUse > &NewUse) + return false; + break; + } + UseInst = NewUseInst; + } + } + } + return true; +} + +/// Get the appropriate tee_local opcode for the given register class. +static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) { + if (RC == &WebAssembly::I32RegClass) + return WebAssembly::TEE_LOCAL_I32; + if (RC == &WebAssembly::I64RegClass) + return WebAssembly::TEE_LOCAL_I64; + if (RC == &WebAssembly::F32RegClass) + return WebAssembly::TEE_LOCAL_F32; + if (RC == &WebAssembly::F64RegClass) + return WebAssembly::TEE_LOCAL_F64; + llvm_unreachable("Unexpected register class"); +} + +// Shrink LI to its uses, cleaning up LI. +static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) { + if (LIS.shrinkToUses(&LI)) { + SmallVector<LiveInterval*, 4> SplitLIs; + LIS.splitSeparateComponents(LI, SplitLIs); + } +} + +/// A single-use def in the same block with no intervening memory or register +/// dependencies; move the def down and nest it with the current instruction. +static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op, + MachineInstr *Def, + MachineBasicBlock &MBB, + MachineInstr *Insert, LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI) { + DEBUG(dbgs() << "Move for single use: "; Def->dump()); + + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(*Def); + + if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) { + // No one else is using this register for anything so we can just stackify + // it in place. + MFI.stackifyVReg(Reg); + } else { + // The register may have unrelated uses or defs; create a new register for + // just our one def and use so that we can stackify it. + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + Def->getOperand(0).setReg(NewReg); + Op.setReg(NewReg); + + // Tell LiveIntervals about the new register. + LIS.createAndComputeVirtRegInterval(NewReg); + + // Tell LiveIntervals about the changes to the old register. + LiveInterval &LI = LIS.getInterval(Reg); + LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(), + LIS.getInstructionIndex(*Op.getParent()).getRegSlot(), + /*RemoveDeadValNo=*/true); + + MFI.stackifyVReg(NewReg); + + DEBUG(dbgs() << " - Replaced register: "; Def->dump()); + } + + ImposeStackOrdering(Def); + return Def; +} + +/// A trivially cloneable instruction; clone it and nest the new copy with the +/// current instruction. +static MachineInstr *RematerializeCheapDef( + unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS, + WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI, + const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) { + DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump()); + DEBUG(dbgs() << " - for use in "; Op.getParent()->dump()); + + unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg)); + TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI); + Op.setReg(NewReg); + MachineInstr *Clone = &*std::prev(Insert); + LIS.InsertMachineInstrInMaps(*Clone); + LIS.createAndComputeVirtRegInterval(NewReg); + MFI.stackifyVReg(NewReg); + ImposeStackOrdering(Clone); + + DEBUG(dbgs() << " - Cloned to "; Clone->dump()); + + // Shrink the interval. + bool IsDead = MRI.use_empty(Reg); + if (!IsDead) { + LiveInterval &LI = LIS.getInterval(Reg); + ShrinkToUses(LI, LIS); + IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot()); + } + + // If that was the last use of the original, delete the original. + if (IsDead) { + DEBUG(dbgs() << " - Deleting original\n"); + SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot(); + LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx); + LIS.removeInterval(Reg); + LIS.RemoveMachineInstrFromMaps(Def); + Def.eraseFromParent(); + } + + return Clone; } +/// A multiple-use def in the same block with no intervening memory or register +/// dependencies; move the def down, nest it with the current instruction, and +/// insert a tee_local to satisfy the rest of the uses. As an illustration, +/// rewrite this: +/// +/// Reg = INST ... // Def +/// INST ..., Reg, ... // Insert +/// INST ..., Reg, ... +/// INST ..., Reg, ... +/// +/// to this: +/// +/// DefReg = INST ... // Def (to become the new Insert) +/// TeeReg, Reg = TEE_LOCAL_... DefReg +/// INST ..., TeeReg, ... // Insert +/// INST ..., Reg, ... +/// INST ..., Reg, ... +/// +/// with DefReg and TeeReg stackified. This eliminates a get_local from the +/// resulting code. +static MachineInstr *MoveAndTeeForMultiUse( + unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB, + MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI, + MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) { + DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump()); + + // Move Def into place. + MBB.splice(Insert, &MBB, Def); + LIS.handleMove(*Def); + + // Create the Tee and attach the registers. + const auto *RegClass = MRI.getRegClass(Reg); + unsigned TeeReg = MRI.createVirtualRegister(RegClass); + unsigned DefReg = MRI.createVirtualRegister(RegClass); + MachineOperand &DefMO = Def->getOperand(0); + MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(), + TII->get(GetTeeLocalOpcode(RegClass)), TeeReg) + .addReg(Reg, RegState::Define) + .addReg(DefReg, getUndefRegState(DefMO.isDead())); + Op.setReg(TeeReg); + DefMO.setReg(DefReg); + SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot(); + SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot(); + + // Tell LiveIntervals we moved the original vreg def from Def to Tee. + LiveInterval &LI = LIS.getInterval(Reg); + LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx); + VNInfo *ValNo = LI.getVNInfoAt(DefIdx); + I->start = TeeIdx; + ValNo->def = TeeIdx; + ShrinkToUses(LI, LIS); + + // Finish stackifying the new regs. + LIS.createAndComputeVirtRegInterval(TeeReg); + LIS.createAndComputeVirtRegInterval(DefReg); + MFI.stackifyVReg(DefReg); + MFI.stackifyVReg(TeeReg); + ImposeStackOrdering(Def); + ImposeStackOrdering(Tee); + + DEBUG(dbgs() << " - Replaced register: "; Def->dump()); + DEBUG(dbgs() << " - Tee instruction: "; Tee->dump()); + return Def; +} + +namespace { +/// A stack for walking the tree of instructions being built, visiting the +/// MachineOperands in DFS order. +class TreeWalkerState { + typedef MachineInstr::mop_iterator mop_iterator; + typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator; + typedef iterator_range<mop_reverse_iterator> RangeTy; + SmallVector<RangeTy, 4> Worklist; + +public: + explicit TreeWalkerState(MachineInstr *Insert) { + const iterator_range<mop_iterator> &Range = Insert->explicit_uses(); + if (Range.begin() != Range.end()) + Worklist.push_back(reverse(Range)); + } + + bool Done() const { return Worklist.empty(); } + + MachineOperand &Pop() { + RangeTy &Range = Worklist.back(); + MachineOperand &Op = *Range.begin(); + Range = drop_begin(Range, 1); + if (Range.begin() == Range.end()) + Worklist.pop_back(); + assert((Worklist.empty() || + Worklist.back().begin() != Worklist.back().end()) && + "Empty ranges shouldn't remain in the worklist"); + return Op; + } + + /// Push Instr's operands onto the stack to be visited. + void PushOperands(MachineInstr *Instr) { + const iterator_range<mop_iterator> &Range(Instr->explicit_uses()); + if (Range.begin() != Range.end()) + Worklist.push_back(reverse(Range)); + } + + /// Some of Instr's operands are on the top of the stack; remove them and + /// re-insert them starting from the beginning (because we've commuted them). + void ResetTopOperands(MachineInstr *Instr) { + assert(HasRemainingOperands(Instr) && + "Reseting operands should only be done when the instruction has " + "an operand still on the stack"); + Worklist.back() = reverse(Instr->explicit_uses()); + } + + /// Test whether Instr has operands remaining to be visited at the top of + /// the stack. + bool HasRemainingOperands(const MachineInstr *Instr) const { + if (Worklist.empty()) + return false; + const RangeTy &Range = Worklist.back(); + return Range.begin() != Range.end() && Range.begin()->getParent() == Instr; + } + + /// Test whether the given register is present on the stack, indicating an + /// operand in the tree that we haven't visited yet. Moving a definition of + /// Reg to a point in the tree after that would change its value. + /// + /// This is needed as a consequence of using implicit get_locals for + /// uses and implicit set_locals for defs. + bool IsOnStack(unsigned Reg) const { + for (const RangeTy &Range : Worklist) + for (const MachineOperand &MO : Range) + if (MO.isReg() && MO.getReg() == Reg) + return true; + return false; + } +}; + +/// State to keep track of whether commuting is in flight or whether it's been +/// tried for the current instruction and didn't work. +class CommutingState { + /// There are effectively three states: the initial state where we haven't + /// started commuting anything and we don't know anything yet, the tenative + /// state where we've commuted the operands of the current instruction and are + /// revisting it, and the declined state where we've reverted the operands + /// back to their original order and will no longer commute it further. + bool TentativelyCommuting; + bool Declined; + + /// During the tentative state, these hold the operand indices of the commuted + /// operands. + unsigned Operand0, Operand1; + +public: + CommutingState() : TentativelyCommuting(false), Declined(false) {} + + /// Stackification for an operand was not successful due to ordering + /// constraints. If possible, and if we haven't already tried it and declined + /// it, commute Insert's operands and prepare to revisit it. + void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker, + const WebAssemblyInstrInfo *TII) { + if (TentativelyCommuting) { + assert(!Declined && + "Don't decline commuting until you've finished trying it"); + // Commuting didn't help. Revert it. + TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TentativelyCommuting = false; + Declined = true; + } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) { + Operand0 = TargetInstrInfo::CommuteAnyOperandIndex; + Operand1 = TargetInstrInfo::CommuteAnyOperandIndex; + if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) { + // Tentatively commute the operands and try again. + TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1); + TreeWalker.ResetTopOperands(Insert); + TentativelyCommuting = true; + Declined = false; + } + } + } + + /// Stackification for some operand was successful. Reset to the default + /// state. + void Reset() { + TentativelyCommuting = false; + Declined = false; + } +}; +} // end anonymous namespace + bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "********** Register Stackifying **********\n" "********** Function: " @@ -140,7 +708,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; MachineRegisterInfo &MRI = MF.getRegInfo(); WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>(); + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); + const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); + MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); LiveIntervals &LIS = getAnalysis<LiveIntervals>(); // Walk the instructions from the bottom up. Currently we don't look past @@ -151,33 +722,37 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { // iterating over it and the end iterator may change. for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) { MachineInstr *Insert = &*MII; - // Don't nest anything inside a phi. - if (Insert->getOpcode() == TargetOpcode::PHI) - break; - // Don't nest anything inside an inline asm, because we don't have // constraints for $push inputs. if (Insert->getOpcode() == TargetOpcode::INLINEASM) - break; + continue; + + // Ignore debugging intrinsics. + if (Insert->getOpcode() == TargetOpcode::DBG_VALUE) + continue; // Iterate through the inputs in reverse order, since we'll be pulling // operands off the stack in LIFO order. - bool AnyStackified = false; - for (MachineOperand &Op : reverse(Insert->uses())) { + CommutingState Commuting; + TreeWalkerState TreeWalker(Insert); + while (!TreeWalker.Done()) { + MachineOperand &Op = TreeWalker.Pop(); + // We're only interested in explicit virtual register operands. - if (!Op.isReg() || Op.isImplicit() || !Op.isUse()) + if (!Op.isReg()) continue; unsigned Reg = Op.getReg(); - - // Only consider registers with a single definition. - // TODO: Eventually we may relax this, to stackify phi transfers. - MachineInstr *Def = MRI.getUniqueVRegDef(Reg); - if (!Def) + assert(Op.isUse() && "explicit_uses() should only iterate over uses"); + assert(!Op.isImplicit() && + "explicit_uses() should only iterate over explicit operands"); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) continue; - // There's no use in nesting implicit defs inside anything. - if (Def->getOpcode() == TargetOpcode::IMPLICIT_DEF) + // Identify the definition for this register at this point. Most + // registers are in SSA form here so we try a quick MRI query first. + MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS); + if (!Def) continue; // Don't nest an INLINE_ASM def into anything, because we don't have @@ -185,10 +760,6 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { if (Def->getOpcode() == TargetOpcode::INLINEASM) continue; - // Don't nest PHIs inside of anything. - if (Def->getOpcode() == TargetOpcode::PHI) - continue; - // Argument instructions represent live-in registers and not real // instructions. if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 || @@ -197,38 +768,53 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; - // Single-use expression trees require defs that have one use. - // TODO: Eventually we'll relax this, to take advantage of set_local - // returning its result. - if (!MRI.hasOneUse(Reg)) - continue; - - // For now, be conservative and don't look across block boundaries. - // TODO: Be more aggressive? - if (Def->getParent() != &MBB) + // Decide which strategy to take. Prefer to move a single-use value + // over cloning it, and prefer cloning over introducing a tee_local. + // For moving, we require the def to be in the same block as the use; + // this makes things simpler (LiveIntervals' handleMove function only + // supports intra-block moves) and it's MachineSink's job to catch all + // the sinking opportunities anyway. + bool SameBlock = Def->getParent() == &MBB; + bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) && + !TreeWalker.IsOnStack(Reg); + if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) { + Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI); + } else if (ShouldRematerialize(*Def, AA, TII)) { + Insert = + RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(), + LIS, MFI, MRI, TII, TRI); + } else if (CanMove && + OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) { + Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI, + MRI, TII); + } else { + // We failed to stackify the operand. If the problem was ordering + // constraints, Commuting may be able to help. + if (!CanMove && SameBlock) + Commuting.MaybeCommute(Insert, TreeWalker, TII); + // Proceed to the next operand. continue; + } - // Don't move instructions that have side effects or memory dependencies - // or other complications. - if (!IsSafeToMove(Def, Insert, AA, LIS, MRI)) - continue; + // We stackified an operand. Add the defining instruction's operands to + // the worklist stack now to continue to build an ever deeper tree. + Commuting.Reset(); + TreeWalker.PushOperands(Insert); + } + // If we stackified any operands, skip over the tree to start looking for + // the next instruction we can build a tree on. + if (Insert != &*MII) { + ImposeStackOrdering(&*MII); + MII = std::prev( + llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert))); Changed = true; - AnyStackified = true; - // Move the def down and nest it in the current instruction. - MBB.splice(Insert, &MBB, Def); - LIS.handleMove(Def); - MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def); - Insert = Def; } - if (AnyStackified) - ImposeStackOrdering(&*MII); } } - // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere - // so that it never looks like a use-before-def. + // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so + // that it never looks like a use-before-def. if (Changed) { MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK); for (MachineBasicBlock &MBB : MF) @@ -236,30 +822,30 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { } #ifndef NDEBUG - // Verify that pushes and pops are performed in FIFO order. + // Verify that pushes and pops are performed in LIFO order. SmallVector<unsigned, 0> Stack; for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { + if (MI.isDebugValue()) + continue; for (MachineOperand &MO : reverse(MI.explicit_operands())) { if (!MO.isReg()) continue; - unsigned VReg = MO.getReg(); - - // Don't stackify physregs like SP or FP. - if (!TargetRegisterInfo::isVirtualRegister(VReg)) - continue; + unsigned Reg = MO.getReg(); - if (MFI.isVRegStackified(VReg)) { + if (MFI.isVRegStackified(Reg)) { if (MO.isDef()) - Stack.push_back(VReg); + Stack.push_back(Reg); else - assert(Stack.pop_back_val() == VReg); + assert(Stack.pop_back_val() == Reg && + "Register stack pop should be paired with a push"); } } } // TODO: Generalize this code to support keeping values on the stack across // basic block boundaries. - assert(Stack.empty()); + assert(Stack.empty() && + "Register stack pushes and pops should be balanced"); } #endif diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 90d8dda530b..239fe89b7ef 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -52,43 +52,74 @@ WebAssemblyRegisterInfo::getReservedRegs(const MachineFunction & /*MF*/) const { } void WebAssemblyRegisterInfo::eliminateFrameIndex( - MachineBasicBlock::iterator II, int SPAdj, - unsigned FIOperandNum, RegScavenger * /*RS*/) const { + MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, + RegScavenger * /*RS*/) const { assert(SPAdj == 0); MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + MachineRegisterInfo &MRI = MF.getRegInfo(); int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - const MachineFrameInfo& MFI = *MF.getFrameInfo(); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex); - if (MI.mayLoadOrStore()) { - // If this is a load or store, make it relative to SP and fold the frame - // offset directly in. + // If this is the address operand of a load or store, make it relative to SP + // and fold the frame offset directly in. + if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) { assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0); int64_t Offset = MI.getOperand(1).getImm() + FrameOffset; - if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) { - // If this happens the program is invalid, but better to error here than - // generate broken code. - report_fatal_error("Memory offset field overflow"); + if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) { + MI.getOperand(FIOperandNum - 1).setImm(Offset); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; } - MI.getOperand(1).setImm(Offset); - MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); - } else { - // Otherwise create an i32.add SP, offset and make it the operand. - auto &MRI = MF.getRegInfo(); - const auto *TII = MF.getSubtarget().getInstrInfo(); + } + + // If this is an address being added to a constant, fold the frame offset + // into the constant. + if (MI.getOpcode() == WebAssembly::ADD_I32) { + MachineOperand &OtherMO = MI.getOperand(3 - FIOperandNum); + if (OtherMO.isReg()) { + unsigned OtherMOReg = OtherMO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(OtherMOReg)) { + MachineInstr *Def = MF.getRegInfo().getUniqueVRegDef(OtherMOReg); + // TODO: For now we just opportunistically do this in the case where + // the CONST_I32 happens to have exactly one def and one use. We + // should generalize this to optimize in more cases. + if (Def && Def->getOpcode() == WebAssembly::CONST_I32 && + MRI.hasOneNonDBGUse(Def->getOperand(0).getReg())) { + MachineOperand &ImmMO = Def->getOperand(1); + ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset)); + MI.getOperand(FIOperandNum) + .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false); + return; + } + } + } + } + + // Otherwise create an i32.add SP, offset and make it the operand. + const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo(); - unsigned OffsetReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::CONST_I32), OffsetReg) + unsigned FIRegOperand = WebAssembly::SP32; + if (FrameOffset) { + // Create i32.add SP, offset and make it the operand. + const TargetRegisterClass *PtrRC = + MRI.getTargetRegisterInfo()->getPointerRegClass(MF); + unsigned OffsetOp = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::CONST_I32), + OffsetOp) .addImm(FrameOffset); - BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(WebAssembly::ADD_I32), OffsetReg) + FIRegOperand = MRI.createVirtualRegister(PtrRC); + BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32), + FIRegOperand) .addReg(WebAssembly::SP32) - .addReg(OffsetReg); - MI.getOperand(FIOperandNum).ChangeToRegister(OffsetReg, /*IsDef=*/false); + .addReg(OffsetOp); } + MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false); } unsigned diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp new file mode 100644 index 00000000000..11bda47eac5 --- /dev/null +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp @@ -0,0 +1,97 @@ +//===-- WebAssemblyReplacePhysRegs.cpp - Replace phys regs with virt regs -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file implements a pass that replaces physical registers with +/// virtual registers. +/// +/// LLVM expects certain physical registers, such as a stack pointer. However, +/// WebAssembly doesn't actually have such physical registers. This pass is run +/// once LLVM no longer needs these registers, and replaces them with virtual +/// registers, so they can participate in register stackifying and coloring in +/// the normal way. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-replace-phys-regs" + +namespace { +class WebAssemblyReplacePhysRegs final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {} + +private: + const char *getPassName() const override { + return "WebAssembly Replace Physical Registers"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyReplacePhysRegs::ID = 0; +FunctionPass *llvm::createWebAssemblyReplacePhysRegs() { + return new WebAssemblyReplacePhysRegs(); +} + +bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Replace Physical Registers **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + const auto &TRI = *MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo(); + bool Changed = false; + + assert(!mustPreserveAnalysisID(LiveIntervalsID) && + "LiveIntervals shouldn't be active yet!"); + // We don't preserve SSA or liveness. + MRI.leaveSSA(); + MRI.invalidateLiveness(); + + for (unsigned PReg = WebAssembly::NoRegister + 1; + PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) { + // Skip fake registers that are never used explicitly. + if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS) + continue; + + // Replace explicit uses of the physical register with a virtual register. + const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(PReg); + unsigned VReg = WebAssembly::NoRegister; + for (auto I = MRI.reg_begin(PReg), E = MRI.reg_end(); I != E; ) { + MachineOperand &MO = *I++; + if (!MO.isImplicit()) { + if (VReg == WebAssembly::NoRegister) + VReg = MRI.createVirtualRegister(RC); + MO.setReg(VReg); + Changed = true; + } + } + } + + return Changed; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index 13d96671276..533c66b7a22 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -9,18 +9,18 @@ /// /// \file /// \brief This file defines the WebAssembly subclass for -/// TargetSelectionDAGInfo. +/// SelectionDAGTargetInfo. /// //===----------------------------------------------------------------------===// #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYSELECTIONDAGINFO_H -#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/CodeGen/SelectionDAGTargetInfo.h" namespace llvm { -class WebAssemblySelectionDAGInfo final : public TargetSelectionDAGInfo { +class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo { public: ~WebAssemblySelectionDAGInfo() override; }; diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp new file mode 100644 index 00000000000..4ebea68c58a --- /dev/null +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp @@ -0,0 +1,114 @@ +//=- WebAssemblySetP2AlignOperands.cpp - Set alignments on loads and stores -=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file sets the p2align operands on load and store instructions. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-set-p2align-operands" + +namespace { +class WebAssemblySetP2AlignOperands final : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { + return "WebAssembly Set p2align Operands"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblySetP2AlignOperands::ID = 0; +FunctionPass *llvm::createWebAssemblySetP2AlignOperands() { + return new WebAssemblySetP2AlignOperands(); +} + +bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Set p2align Operands **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + + for (auto &MBB : MF) { + for (auto &MI : MBB) { + switch (MI.getOpcode()) { + case WebAssembly::LOAD_I32: + case WebAssembly::LOAD_I64: + case WebAssembly::LOAD_F32: + case WebAssembly::LOAD_F64: + case WebAssembly::LOAD8_S_I32: + case WebAssembly::LOAD8_U_I32: + case WebAssembly::LOAD16_S_I32: + case WebAssembly::LOAD16_U_I32: + case WebAssembly::LOAD8_S_I64: + case WebAssembly::LOAD8_U_I64: + case WebAssembly::LOAD16_S_I64: + case WebAssembly::LOAD16_U_I64: + case WebAssembly::LOAD32_S_I64: + case WebAssembly::LOAD32_U_I64: + case WebAssembly::STORE_I32: + case WebAssembly::STORE_I64: + case WebAssembly::STORE_F32: + case WebAssembly::STORE_F64: + case WebAssembly::STORE8_I32: + case WebAssembly::STORE16_I32: + case WebAssembly::STORE8_I64: + case WebAssembly::STORE16_I64: + case WebAssembly::STORE32_I64: { + assert(MI.getOperand(3).getImm() == 0 && + "ISel should set p2align operands to 0"); + assert(MI.hasOneMemOperand() && + "Load and store instructions have exactly one mem operand"); + assert((*MI.memoperands_begin())->getSize() == + (UINT64_C(1) + << WebAssembly::GetDefaultP2Align(MI.getOpcode())) && + "Default p2align value should be natural"); + assert(MI.getDesc().OpInfo[3].OperandType == + WebAssembly::OPERAND_P2ALIGN && + "Load and store instructions should have a p2align operand"); + uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment()); + + // WebAssembly does not currently support supernatural alignment. + P2Align = std::min( + P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode()))); + + MI.getOperand(3).setImm(P2Align); + break; + } + default: + break; + } + } + } + + return Changed; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 4e08b2b079e..1e9a773ae62 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -17,12 +17,19 @@ /// potentially also exposing the store to register stackifying. These both can /// reduce get_local/set_local traffic. /// +/// This pass also performs this optimization for memcpy, memmove, and memset +/// calls, since the LLVM intrinsics for these return void so they can't use the +/// returned attribute and consequently aren't handled by the OptimizeReturned +/// pass. +/// //===----------------------------------------------------------------------===// #include "WebAssembly.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblySubtarget.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -49,6 +56,10 @@ public: AU.addPreserved<MachineBlockFrequencyInfo>(); AU.addRequired<MachineDominatorTree>(); AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -63,17 +74,127 @@ FunctionPass *llvm::createWebAssemblyStoreResults() { return new WebAssemblyStoreResults(); } +// Replace uses of FromReg with ToReg if they are dominated by MI. +static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI, + unsigned FromReg, unsigned ToReg, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, + LiveIntervals &LIS) { + bool Changed = false; + + LiveInterval *FromLI = &LIS.getInterval(FromReg); + LiveInterval *ToLI = &LIS.getInterval(ToReg); + + SlotIndex FromIdx = LIS.getInstructionIndex(MI).getRegSlot(); + VNInfo *FromVNI = FromLI->getVNInfoAt(FromIdx); + + SmallVector<SlotIndex, 4> Indices; + + for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { + MachineOperand &O = *I++; + MachineInstr *Where = O.getParent(); + + // Check that MI dominates the instruction in the normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + + // If this use gets a different value, skip it. + SlotIndex WhereIdx = LIS.getInstructionIndex(*Where); + VNInfo *WhereVNI = FromLI->getVNInfoAt(WhereIdx); + if (WhereVNI && WhereVNI != FromVNI) + continue; + + // Make sure ToReg isn't clobbered before it gets there. + VNInfo *ToVNI = ToLI->getVNInfoAt(WhereIdx); + if (ToVNI && ToVNI != FromVNI) + continue; + + Changed = true; + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << " from " + << MI << "\n"); + O.setReg(ToReg); + + // If the store's def was previously dead, it is no longer. + if (!O.isUndef()) { + MI.getOperand(0).setIsDead(false); + + Indices.push_back(WhereIdx.getRegSlot()); + } + } + + if (Changed) { + // Extend ToReg's liveness. + LIS.extendToIndices(*ToLI, Indices); + + // Shrink FromReg's liveness. + LIS.shrinkToUses(FromLI); + + // If we replaced all dominated uses, FromReg is now killed at MI. + if (!FromLI->liveAt(FromIdx.getDeadSlot())) + MI.addRegisterKilled(FromReg, + MBB.getParent()->getSubtarget<WebAssemblySubtarget>() + .getRegisterInfo()); + } + + return Changed; +} + +static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, + LiveIntervals &LIS) { + unsigned ToReg = MI.getOperand(0).getReg(); + unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg(); + return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); +} + +static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI, + const MachineRegisterInfo &MRI, + MachineDominatorTree &MDT, + LiveIntervals &LIS, + const WebAssemblyTargetLowering &TLI, + const TargetLibraryInfo &LibInfo) { + MachineOperand &Op1 = MI.getOperand(1); + if (!Op1.isSymbol()) + return false; + + StringRef Name(Op1.getSymbolName()); + bool callReturnsInput = Name == TLI.getLibcallName(RTLIB::MEMCPY) || + Name == TLI.getLibcallName(RTLIB::MEMMOVE) || + Name == TLI.getLibcallName(RTLIB::MEMSET); + if (!callReturnsInput) + return false; + + LibFunc::Func Func; + if (!LibInfo.getLibFunc(Name, Func)) + return false; + + unsigned FromReg = MI.getOperand(2).getReg(); + unsigned ToReg = MI.getOperand(0).getReg(); + if (MRI.getRegClass(FromReg) != MRI.getRegClass(ToReg)) + report_fatal_error("Store results: call to builtin function with wrong " + "signature, from/to mismatch"); + return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS); +} + bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { DEBUG({ dbgs() << "********** Store Results **********\n" << "********** Function: " << MF.getName() << '\n'; }); - const MachineRegisterInfo &MRI = MF.getRegInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>(); + const WebAssemblyTargetLowering &TLI = + *MF.getSubtarget<WebAssemblySubtarget>().getTargetLowering(); + const auto &LibInfo = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); bool Changed = false; - assert(MRI.isSSA() && "StoreResults depends on SSA form"); + // We don't preserve SSA form. + MRI.leaveSSA(); + + assert(MRI.tracksLiveness() && "StoreResults expects liveness tracking"); for (auto &MBB : MF) { DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); @@ -90,33 +211,12 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::STORE_F64: case WebAssembly::STORE_I32: case WebAssembly::STORE_I64: - unsigned ToReg = MI.getOperand(0).getReg(); - unsigned FromReg = MI.getOperand(3).getReg(); - for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { - MachineOperand &O = *I++; - MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) { - // PHIs use their operands on their incoming CFG edges rather than - // in their parent blocks. Get the basic block paired with this use - // of FromReg and check that MI's block dominates it. - MachineBasicBlock *Pred = - Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); - if (!MDT.dominates(&MBB, Pred)) - continue; - } else { - // For a non-PHI, check that MI dominates the instruction in the - // normal way. - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - } - Changed = true; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where - << " from " << MI << "\n"); - O.setReg(ToReg); - // If the store's def was previously dead, it is no longer. But the - // dead flag shouldn't be set yet. - assert(!MI.getOperand(0).isDead() && "Dead flag set on store result"); - } + Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS); + break; + case WebAssembly::CALL_I32: + case WebAssembly::CALL_I64: + Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo); + break; } } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index cb2d5a63a19..ce39051b055 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -13,9 +13,9 @@ /// //===----------------------------------------------------------------------===// -#include "WebAssemblyInstrInfo.h" -#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" #include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "WebAssemblyInstrInfo.h" #include "llvm/Support/TargetRegistry.h" using namespace llvm; @@ -45,5 +45,11 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(), TLInfo(TM, *this) {} -bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } +bool WebAssemblySubtarget::enableMachineScheduler() const { + // Disable the MachineScheduler for now. Even with ShouldTrackPressure set and + // enableMachineSchedDefaultSched overridden, it appears to have an overall + // negative effect for the kinds of register optimizations we're doing. + return false; +} + bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index b290b4bf744..32154af3c1c 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -20,8 +20,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Function.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Scalar.h" @@ -39,16 +39,23 @@ extern "C" void LLVMInitializeWebAssemblyTarget() { // WebAssembly Lowering public interface. //===----------------------------------------------------------------------===// +static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { + if (!RM.hasValue()) + return Reloc::PIC_; + return *RM; +} + /// Create an WebAssembly architecture model. /// WebAssemblyTargetMachine::WebAssemblyTargetMachine( const Target &T, const Triple &TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) + const TargetOptions &Options, Optional<Reloc::Model> RM, + CodeModel::Model CM, CodeGenOpt::Level OL) : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" : "e-m:e-p:32:32-i64:64-n32:64-S128", - TT, CPU, FS, Options, RM, CM, OL), + TT, CPU, FS, Options, getEffectiveRelocModel(RM), + CM, OL), TLOF(make_unique<WebAssemblyTargetObjectFile>()) { // WebAssembly type-checks expressions, but a noreturn function with a return // type that doesn't match the context will cause a check failure. So we lower @@ -58,9 +65,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine( initAsmInfo(); - // We need a reducible CFG, so disable some optimizations which tend to - // introduce irreducibility. - setRequiresStructuredCFG(true); + // Note that we don't use setRequiresStructuredCFG(true). It disables + // optimizations than we're ok with, and want, such as critical edge + // splitting and tail merging. } WebAssemblyTargetMachine::~WebAssemblyTargetMachine() {} @@ -103,9 +110,8 @@ public: void addIRPasses() override; bool addInstSelector() override; - bool addILPOpts() override; - void addPreRegAlloc() override; void addPostRegAlloc() override; + bool addGCPasses() override { return false; } void addPreEmitPass() override; }; } // end anonymous namespace @@ -140,7 +146,8 @@ void WebAssemblyPassConfig::addIRPasses() { addPass(createAtomicExpandPass(TM)); // Optimize "returned" function attributes. - addPass(createWebAssemblyOptimizeReturned()); + if (getOptLevel() != CodeGenOpt::None) + addPass(createWebAssemblyOptimizeReturned()); TargetPassConfig::addIRPasses(); } @@ -153,58 +160,75 @@ bool WebAssemblyPassConfig::addInstSelector() { // so that we can fix up the ARGUMENT instructions before anything else // sees them in the wrong place. addPass(createWebAssemblyArgumentMove()); + // Set the p2align operands. This information is present during ISel, however + // it's inconvenient to collect. Collect it now, and update the immediate + // operands. + addPass(createWebAssemblySetP2AlignOperands()); return false; } -bool WebAssemblyPassConfig::addILPOpts() { - (void)TargetPassConfig::addILPOpts(); - return true; -} - -void WebAssemblyPassConfig::addPreRegAlloc() { - TargetPassConfig::addPreRegAlloc(); - - // Prepare store instructions for register stackifying. - addPass(createWebAssemblyStoreResults()); -} - void WebAssemblyPassConfig::addPostRegAlloc() { // TODO: The following CodeGen passes don't currently support code containing // virtual registers. Consider removing their restrictions and re-enabling // them. - // - // We use our own PrologEpilogInserter which is very slightly modified to - // tolerate virtual registers. - disablePass(&PrologEpilogCodeInserterID); - // Fails with: should be run after register allocation. - disablePass(&MachineCopyPropagationID); - // Mark registers as representing wasm's expression stack. - addPass(createWebAssemblyRegStackify()); + // Has no asserts of its own, but was not written to handle virtual regs. + disablePass(&ShrinkWrapID); - // Run the register coloring pass to reduce the total number of registers. - addPass(createWebAssemblyRegColoring()); + // These functions all require the AllVRegsAllocated property. + disablePass(&MachineCopyPropagationID); + disablePass(&PostRASchedulerID); + disablePass(&FuncletLayoutID); + disablePass(&StackMapLivenessID); + disablePass(&LiveDebugValuesID); + disablePass(&PatchableFunctionID); TargetPassConfig::addPostRegAlloc(); - - // Run WebAssembly's version of the PrologEpilogInserter. Target-independent - // PEI runs after PostRegAlloc and after ShrinkWrap. Putting it here will run - // PEI before ShrinkWrap but otherwise in the same position in the order. - addPass(createWebAssemblyPEI()); } void WebAssemblyPassConfig::addPreEmitPass() { TargetPassConfig::addPreEmitPass(); + // Now that we have a prologue and epilogue and all frame indices are + // rewritten, eliminate SP and FP. This allows them to be stackified, + // colored, and numbered with the rest of the registers. + addPass(createWebAssemblyReplacePhysRegs()); + + if (getOptLevel() != CodeGenOpt::None) { + // LiveIntervals isn't commonly run this late. Re-establish preconditions. + addPass(createWebAssemblyPrepareForLiveIntervals()); + + // Depend on LiveIntervals and perform some optimizations on it. + addPass(createWebAssemblyOptimizeLiveIntervals()); + + // Prepare store instructions for register stackifying. + addPass(createWebAssemblyStoreResults()); + + // Mark registers as representing wasm's expression stack. This is a key + // code-compression technique in WebAssembly. We run this pass (and + // StoreResults above) very late, so that it sees as much code as possible, + // including code emitted by PEI and expanded by late tail duplication. + addPass(createWebAssemblyRegStackify()); + + // Run the register coloring pass to reduce the total number of registers. + // This runs after stackification so that it doesn't consider registers + // that become stackified. + addPass(createWebAssemblyRegColoring()); + } + + // Eliminate multiple-entry loops. + addPass(createWebAssemblyFixIrreducibleControlFlow()); + // Put the CFG in structured form; insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); // Lower br_unless into br_if. addPass(createWebAssemblyLowerBrUnless()); + // Perform the very last peephole optimizations on the code. + if (getOptLevel() != CodeGenOpt::None) + addPass(createWebAssemblyPeephole()); + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. addPass(createWebAssemblyRegNumbering()); - - // Perform the very last peephole optimizations on the code. - addPass(createWebAssemblyPeephole()); } diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h index 3226edcdc61..52a2ef78736 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.h @@ -28,7 +28,7 @@ class WebAssemblyTargetMachine final : public LLVMTargetMachine { public: WebAssemblyTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, + Optional<Reloc::Model> RM, CodeModel::Model CM, CodeGenOpt::Level OL); ~WebAssemblyTargetMachine() override; @@ -44,6 +44,8 @@ public: /// \brief Get the TargetIRAnalysis for this target. TargetIRAnalysis getTargetIRAnalysis() override; + + bool usesPhysRegsForPEI() const override { return false; } }; } // end namespace llvm diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index 35663171192..bf546dab5fb 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,3 +25,59 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TargetTransformInfo::PSK_FastHardware; } + +unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) { + unsigned Result = BaseT::getNumberOfRegisters(Vector); + + // For SIMD, use at least 16 registers, as a rough guess. + if (Vector) + Result = std::max(Result, 16u); + + return Result; +} + +unsigned WebAssemblyTTIImpl::getRegisterBitWidth(bool Vector) { + if (Vector && getST()->hasSIMD128()) + return 128; + + return 64; +} + +unsigned WebAssemblyTTIImpl::getArithmeticInstrCost( + unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info, + TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, + TTI::OperandValueProperties Opd2PropInfo) { + + unsigned Cost = BasicTTIImplBase<WebAssemblyTTIImpl>::getArithmeticInstrCost( + Opcode, Ty, Opd1Info, Opd2Info, Opd1PropInfo, Opd2PropInfo); + + if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { + switch (Opcode) { + case Instruction::LShr: + case Instruction::AShr: + case Instruction::Shl: + // SIMD128's shifts currently only accept a scalar shift count. For each + // element, we'll need to extract, op, insert. The following is a rough + // approxmation. + if (Opd2Info != TTI::OK_UniformValue && + Opd2Info != TTI::OK_UniformConstantValue) + Cost = VTy->getNumElements() * + (TargetTransformInfo::TCC_Basic + + getArithmeticInstrCost(Opcode, VTy->getElementType()) + + TargetTransformInfo::TCC_Basic); + break; + } + } + return Cost; +} + +unsigned WebAssemblyTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) { + unsigned Cost = BasicTTIImplBase::getVectorInstrCost(Opcode, Val, Index); + + // SIMD128's insert/extract currently only take constant indices. + if (Index == -1u) + return Cost + 25 * TargetTransformInfo::TCC_Expensive; + + return Cost; +} diff --git a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 26dc388cc92..fe99e96eb3b 100644 --- a/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/gnu/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -61,7 +61,15 @@ public: /// \name Vector TTI Implementations /// @{ - // TODO: Implement Vector TTI for WebAssembly + unsigned getNumberOfRegisters(bool Vector); + unsigned getRegisterBitWidth(bool Vector); + unsigned getArithmeticInstrCost( + unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None); + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index); /// @} }; diff --git a/gnu/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/gnu/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt index 91b3fff05dc..f07400021dc 100644 --- a/gnu/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt +++ b/gnu/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -1,253 +1,15 @@ # Tests which are known to fail from the GCC torture test suite. -# Core dump. -920908-1.c -pr38151.c -va-arg-22.c - -# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. -struct-ret-1.c -va-arg-11.c -va-arg-21.c -va-arg-24.c -va-arg-trap-1.c - -# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed. -20000815-1.c -20010129-1.c -930628-1.c -980707-1.c - -# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed. -20030914-2.c -20040703-1.c -20081117-1.c -920625-1.c -931004-11.c -931004-13.c -980223.c -bitfld-5.c -complex-7.c -pr38969.c -pr51323.c -pr52129.c -pr57130.c - -# These were previously "Cannot select FrameIndex." Now most of them fail -# because they contain call frame pseudos (e.g. call a vararg func), -# frame pointers, or similar. This list will be updated again soon. -20000519-1.c -20000706-4.c -20000706-5.c -20000801-2.c -20000801-4.c -20011126-2.c - -20020529-1.c -20021024-1.c - -20030828-1.c -20030914-1.c - +# Computed gotos are not supported (Cannot select BlockAddress/BRIND) 20040302-1.c -20040625-1.c -20040823-1.c - -20041113-1.c - -20041214-1.c - -20050826-2.c - -20071213-1.c - -20080506-2.c -20080519-1.c - -20081103-1.c -20090113-1.c -20090113-2.c -20090113-3.c - -20090623-1.c - -920501-6.c -920501-8.c -920726-1.c -930518-1.c - -931004-10.c -931004-12.c -931004-14.c -931004-2.c -931004-4.c -931004-6.c -931004-8.c - -980205.c -980608-1.c -980709-1.c -980716-1.c -990127-1.c - -991216-2.c - -#cbrt.c -complex-5.c -complex-6.c - -enum-3.c -fprintf-chk-1.c -frame-address.c -loop-15.c -loop-ivopts-2.c -mayalias-3.c - -multi-ix.c - -pr20466-1.c - - -pr28778.c -pr28982b.c - -pr30778.c -pr31448-2.c -pr31448.c - -pr33870-1.c -pr33870.c - -pr38051.c - -pr39100.c - -pr39339.c - -pr43987.c - -pr44575.c - -pr44942.c -pr46309.c -pr47538.c -pr47925.c - -pr49390.c -pr49419.c - -#pr51877.c - -#pr52979-1.c -#pr52979-2.c -pr53645-2.c -pr53645.c - -pr56205.c - -pr56866.c - -pr57876.c -pr58277-1.c - -pr59643.c - -printf-chk-1.c -pta-field-1.c -pta-field-2.c - -stdarg-1.c -stdarg-2.c -stdarg-3.c -stdarg-4.c -strct-stdarg-1.c -strct-varg-1.c - -va-arg-1.c -va-arg-10.c -va-arg-12.c -va-arg-13.c -va-arg-14.c -va-arg-15.c -va-arg-16.c -va-arg-17.c -va-arg-18.c -va-arg-19.c -va-arg-2.c -va-arg-20.c -va-arg-23.c -va-arg-26.c -va-arg-4.c -va-arg-5.c -va-arg-6.c -va-arg-7.c -va-arg-8.c -va-arg-9.c -va-arg-pack-1.c -vfprintf-1.c -vfprintf-chk-1.c -vprintf-1.c -vprintf-chk-1.c - -# Cannot select callseq_end. -20040811-1.c -pr43220.c -vla-dealloc-1.c - -# Cannot select brind. 20071210-1.c 920501-4.c 920501-5.c - -# Cannot select BlockAddress. comp-goto-1.c 980526-1.c 990208-1.c -# WebAssembly hasn't implemented byval arguments. -20000412-3.c -20000419-1.c -20000706-1.c -20000706-2.c -20000707-1.c -20000717-1.c -20000717-5.c -20000808-1.c -20010605-2.c -20011113-1.c -20020215-1.c -20020810-1.c -20021118-1.c -20040707-1.c -20040709-1.c -20040709-2.c -20041201-1.c -20050713-1.c -20070614-1.c -920908-2.c -921112-1.c -921117-1.c -921123-2.c -921204-1.c -930126-1.c -930208-1.c -931004-5.c -931004-9.c -931031-1.c -950607-2.c -960416-1.c -990525-1.c -991118-1.c -bf64-1.c -complex-1.c -complex-2.c -pr15262-2.c -pr20621-1.c -pr23135.c -pr30185.c -pr42248.c - -# unimplemented operation lowering. +# WebAssembly hasn't implemented (will never?) __builtin_return_address 20010122-1.c 20030323-1.c 20030811-1.c @@ -255,7 +17,6 @@ pr17377.c # Error: invalid output constraint '=t' in asm. 990413-2.c -990826-0.c # Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target. built-in-setjmp.c @@ -300,10 +61,9 @@ pr51447.c 20070919-1.c align-nest.c pr41935.c -20050107-1.c -20050119-1.c -20050119-2.c 920302-1.c 920501-3.c 920728-1.c pr28865.c +widechar-2.c +pr41463.c |
