diff options
| author | 2020-08-03 15:06:44 +0000 | |
|---|---|---|
| committer | 2020-08-03 15:06:44 +0000 | |
| commit | b64793999546ed8adebaeebd9d8345d18db8927d (patch) | |
| tree | 4357c27b561d73b0e089727c6ed659f2ceff5f47 /gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp | |
| parent | Add support for UTF-8 DISPLAY-HINTs with octet length. For now only (diff) | |
| download | wireguard-openbsd-b64793999546ed8adebaeebd9d8345d18db8927d.tar.xz wireguard-openbsd-b64793999546ed8adebaeebd9d8345d18db8927d.zip | |
Remove LLVM 8.0.1 files.
Diffstat (limited to 'gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp')
| -rw-r--r-- | gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp | 1635 |
1 files changed, 0 insertions, 1635 deletions
diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp deleted file mode 100644 index 473de894894..00000000000 --- a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp +++ /dev/null @@ -1,1635 +0,0 @@ -//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -/// -/// \file Converts from in-memory normalized mach-o to in-memory Atoms. -/// -/// +------------+ -/// | normalized | -/// +------------+ -/// | -/// | -/// v -/// +-------+ -/// | Atoms | -/// +-------+ - -#include "ArchHandler.h" -#include "Atoms.h" -#include "File.h" -#include "MachONormalizedFile.h" -#include "MachONormalizedFileBinaryUtils.h" -#include "lld/Common/LLVM.h" -#include "lld/Core/Error.h" -#include "llvm/BinaryFormat/Dwarf.h" -#include "llvm/BinaryFormat/MachO.h" -#include "llvm/DebugInfo/DWARF/DWARFFormValue.h" -#include "llvm/Support/DataExtractor.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/LEB128.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm::MachO; -using namespace lld::mach_o::normalized; - -#define DEBUG_TYPE "normalized-file-to-atoms" - -namespace lld { -namespace mach_o { - - -namespace { // anonymous - - -#define ENTRY(seg, sect, type, atomType) \ - {seg, sect, type, DefinedAtom::atomType } - -struct MachORelocatableSectionToAtomType { - StringRef segmentName; - StringRef sectionName; - SectionType sectionType; - DefinedAtom::ContentType atomType; -}; - -const MachORelocatableSectionToAtomType sectsToAtomType[] = { - ENTRY("__TEXT", "__text", S_REGULAR, typeCode), - ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), - ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), - ENTRY("", "", S_CSTRING_LITERALS, typeCString), - ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), - ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), - ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), - ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), - ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), - ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), - ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), - ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), - ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), - ENTRY("__DATA", "__data", S_REGULAR, typeData), - ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), - ENTRY("__DATA", "__const", S_REGULAR, typeConstData), - ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), - ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, - typeInitializerPtr), - ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, - typeTerminatorPtr), - ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, - typeGOT), - ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), - ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, - typeGOT), - ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), - ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, - typeThunkTLV), - ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), - ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, - typeTLVInitialZeroFill), - ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), - ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), - ENTRY("", "", S_INTERPOSING, typeInterposingTuples), - ENTRY("__LD", "__compact_unwind", S_REGULAR, - typeCompactUnwindInfo), - ENTRY("", "", S_REGULAR, typeUnknown) -}; -#undef ENTRY - - -/// Figures out ContentType of a mach-o section. -DefinedAtom::ContentType atomTypeFromSection(const Section §ion, - bool &customSectionName) { - // First look for match of name and type. Empty names in table are wildcards. - customSectionName = false; - for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; - p->atomType != DefinedAtom::typeUnknown; ++p) { - if (p->sectionType != section.type) - continue; - if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) - continue; - if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) - continue; - customSectionName = p->segmentName.empty() && p->sectionName.empty(); - return p->atomType; - } - // Look for code denoted by section attributes - if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) - return DefinedAtom::typeCode; - - return DefinedAtom::typeUnknown; -} - -enum AtomizeModel { - atomizeAtSymbols, - atomizeFixedSize, - atomizePointerSize, - atomizeUTF8, - atomizeUTF16, - atomizeCFI, - atomizeCU, - atomizeCFString -}; - -/// Returns info on how to atomize a section of the specified ContentType. -void sectionParseInfo(DefinedAtom::ContentType atomType, - unsigned int &sizeMultiple, - DefinedAtom::Scope &scope, - DefinedAtom::Merge &merge, - AtomizeModel &atomizeModel) { - struct ParseInfo { - DefinedAtom::ContentType atomType; - unsigned int sizeMultiple; - DefinedAtom::Scope scope; - DefinedAtom::Merge merge; - AtomizeModel atomizeModel; - }; - - #define ENTRY(type, size, scope, merge, model) \ - {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } - - static const ParseInfo parseInfo[] = { - ENTRY(typeCode, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeData, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeConstData, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeConstant, 1, scopeGlobal, mergeNo, - atomizeAtSymbols), - ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, - atomizeUTF8), - ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, - atomizeUTF16), - ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, - atomizeCFI), - ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, - atomizeFixedSize), - ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, - atomizeCFString), - ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, - atomizePointerSize), - ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, - atomizePointerSize), - ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, - atomizeCU), - ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, - atomizePointerSize), - ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, - atomizePointerSize), - ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, - atomizeAtSymbols) - }; - #undef ENTRY - const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); - for (int i=0; i < tableLen; ++i) { - if (parseInfo[i].atomType == atomType) { - sizeMultiple = parseInfo[i].sizeMultiple; - scope = parseInfo[i].scope; - merge = parseInfo[i].merge; - atomizeModel = parseInfo[i].atomizeModel; - return; - } - } - - // Unknown type is atomized by symbols. - sizeMultiple = 1; - scope = DefinedAtom::scopeGlobal; - merge = DefinedAtom::mergeNo; - atomizeModel = atomizeAtSymbols; -} - - -Atom::Scope atomScope(uint8_t scope) { - switch (scope) { - case N_EXT: - return Atom::scopeGlobal; - case N_PEXT: - case N_PEXT | N_EXT: - return Atom::scopeLinkageUnit; - case 0: - return Atom::scopeTranslationUnit; - } - llvm_unreachable("unknown scope value!"); -} - -void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, - uint32_t sectionIndex, - SmallVector<const Symbol *, 64> &outSyms) { - for (const Symbol &sym : inSymbols) { - // Only look at definition symbols. - if ((sym.type & N_TYPE) != N_SECT) - continue; - if (sym.sect != sectionIndex) - continue; - outSyms.push_back(&sym); - } -} - -void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, - MachOFile &file, uint64_t symbolAddr, StringRef symbolName, - uint16_t symbolDescFlags, Atom::Scope symbolScope, - uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { - // Mach-O symbol table does have size in it. Instead the size is the - // difference between this and the next symbol. - uint64_t size = nextSymbolAddr - symbolAddr; - uint64_t offset = symbolAddr - section.address; - bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; - if (isZeroFillSection(section.type)) { - file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, - noDeadStrip, copyRefs, §ion); - } else { - DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) - ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; - bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); - if (atomType == DefinedAtom::typeUnknown) { - // Mach-O needs a segment and section name. Concatentate those two - // with a / separator (e.g. "seg/sect") to fit into the lld model - // of just a section name. - std::string segSectName = section.segmentName.str() - + "/" + section.sectionName.str(); - file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, - merge, thumb, noDeadStrip, offset, - size, segSectName, true, §ion); - } else { - if ((atomType == lld::DefinedAtom::typeCode) && - (symbolDescFlags & N_SYMBOL_RESOLVER)) { - atomType = lld::DefinedAtom::typeResolver; - } - file.addDefinedAtom(symbolName, symbolScope, atomType, merge, - offset, size, thumb, noDeadStrip, copyRefs, §ion); - } - } -} - -llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, - const Section §ion, - const NormalizedFile &normalizedFile, - MachOFile &file, bool scatterable, - bool copyRefs) { - // Find section's index. - uint32_t sectIndex = 1; - for (auto § : normalizedFile.sections) { - if (§ == §ion) - break; - ++sectIndex; - } - - // Find all symbols in this section. - SmallVector<const Symbol *, 64> symbols; - appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); - appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); - - // Sort symbols. - std::sort(symbols.begin(), symbols.end(), - [](const Symbol *lhs, const Symbol *rhs) -> bool { - if (lhs == rhs) - return false; - // First by address. - uint64_t lhsAddr = lhs->value; - uint64_t rhsAddr = rhs->value; - if (lhsAddr != rhsAddr) - return lhsAddr < rhsAddr; - // If same address, one is an alias so sort by scope. - Atom::Scope lScope = atomScope(lhs->scope); - Atom::Scope rScope = atomScope(rhs->scope); - if (lScope != rScope) - return lScope < rScope; - // If same address and scope, see if one might be better as - // the alias. - bool lPrivate = (lhs->name.front() == 'l'); - bool rPrivate = (rhs->name.front() == 'l'); - if (lPrivate != rPrivate) - return lPrivate; - // If same address and scope, sort by name. - return lhs->name < rhs->name; - }); - - // Debug logging of symbols. - //for (const Symbol *sym : symbols) - // llvm::errs() << " sym: " - // << llvm::format("0x%08llx ", (uint64_t)sym->value) - // << ", " << sym->name << "\n"; - - // If section has no symbols and no content, there are no atoms. - if (symbols.empty() && section.content.empty()) - return llvm::Error::success(); - - if (symbols.empty()) { - // Section has no symbols, put all content in one anoymous atom. - atomFromSymbol(atomType, section, file, section.address, StringRef(), - 0, Atom::scopeTranslationUnit, - section.address + section.content.size(), - scatterable, copyRefs); - } - else if (symbols.front()->value != section.address) { - // Section has anonymous content before first symbol. - atomFromSymbol(atomType, section, file, section.address, StringRef(), - 0, Atom::scopeTranslationUnit, symbols.front()->value, - scatterable, copyRefs); - } - - const Symbol *lastSym = nullptr; - for (const Symbol *sym : symbols) { - if (lastSym != nullptr) { - // Ignore any assembler added "ltmpNNN" symbol at start of section - // if there is another symbol at the start. - if ((lastSym->value != sym->value) - || lastSym->value != section.address - || !lastSym->name.startswith("ltmp")) { - atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, - lastSym->desc, atomScope(lastSym->scope), sym->value, - scatterable, copyRefs); - } - } - lastSym = sym; - } - if (lastSym != nullptr) { - atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, - lastSym->desc, atomScope(lastSym->scope), - section.address + section.content.size(), - scatterable, copyRefs); - } - - // If object built without .subsections_via_symbols, add reference chain. - if (!scatterable) { - MachODefinedAtom *prevAtom = nullptr; - file.eachAtomInSection(section, - [&](MachODefinedAtom *atom, uint64_t offset)->void { - if (prevAtom) - prevAtom->addReference(Reference::KindNamespace::all, - Reference::KindArch::all, - Reference::kindLayoutAfter, 0, atom, 0); - prevAtom = atom; - }); - } - - return llvm::Error::success(); -} - -llvm::Error processSection(DefinedAtom::ContentType atomType, - const Section §ion, - bool customSectionName, - const NormalizedFile &normalizedFile, - MachOFile &file, bool scatterable, - bool copyRefs) { - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - - // Get info on how to atomize section. - unsigned int sizeMultiple; - DefinedAtom::Scope scope; - DefinedAtom::Merge merge; - AtomizeModel atomizeModel; - sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); - - // Validate section size. - if ((section.content.size() % sizeMultiple) != 0) - return llvm::make_error<GenericError>(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " has size (" - + Twine(section.content.size()) - + ") which is not a multiple of " - + Twine(sizeMultiple)); - - if (atomizeModel == atomizeAtSymbols) { - // Break section up into atoms each with a fixed size. - return processSymboledSection(atomType, section, normalizedFile, file, - scatterable, copyRefs); - } else { - unsigned int size; - for (unsigned int offset = 0, e = section.content.size(); offset != e;) { - switch (atomizeModel) { - case atomizeFixedSize: - // Break section up into atoms each with a fixed size. - size = sizeMultiple; - break; - case atomizePointerSize: - // Break section up into atoms each the size of a pointer. - size = is64 ? 8 : 4; - break; - case atomizeUTF8: - // Break section up into zero terminated c-strings. - size = 0; - for (unsigned int i = offset; i < e; ++i) { - if (section.content[i] == 0) { - size = i + 1 - offset; - break; - } - } - break; - case atomizeUTF16: - // Break section up into zero terminated UTF16 strings. - size = 0; - for (unsigned int i = offset; i < e; i += 2) { - if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { - size = i + 2 - offset; - break; - } - } - break; - case atomizeCFI: - // Break section up into dwarf unwind CFIs (FDE or CIE). - size = read32(§ion.content[offset], isBig) + 4; - if (offset+size > section.content.size()) { - return llvm::make_error<GenericError>(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " is malformed. Size of CFI " - "starting at offset (" - + Twine(offset) - + ") is past end of section."); - } - break; - case atomizeCU: - // Break section up into compact unwind entries. - size = is64 ? 32 : 20; - break; - case atomizeCFString: - // Break section up into NS/CFString objects. - size = is64 ? 32 : 16; - break; - case atomizeAtSymbols: - break; - } - if (size == 0) { - return llvm::make_error<GenericError>(Twine("Section ") - + section.segmentName - + "/" + section.sectionName - + " is malformed. The last atom " - "is not zero terminated."); - } - if (customSectionName) { - // Mach-O needs a segment and section name. Concatentate those two - // with a / separator (e.g. "seg/sect") to fit into the lld model - // of just a section name. - std::string segSectName = section.segmentName.str() - + "/" + section.sectionName.str(); - file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, - merge, false, false, offset, - size, segSectName, true, §ion); - } else { - file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, - false, false, copyRefs, §ion); - } - offset += size; - } - } - return llvm::Error::success(); -} - -const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, - uint64_t address) { - for (const Section &s : normalizedFile.sections) { - uint64_t sAddr = s.address; - if ((sAddr <= address) && (address < sAddr+s.content.size())) { - return &s; - } - } - return nullptr; -} - -const MachODefinedAtom * -findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, - uint64_t addr, Reference::Addend &addend) { - const Section *sect = nullptr; - sect = findSectionCoveringAddress(normalizedFile, addr); - if (!sect) - return nullptr; - - uint32_t offsetInTarget; - uint64_t offsetInSect = addr - sect->address; - auto atom = - file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); - addend = offsetInTarget; - return atom; -} - -// Walks all relocations for a section in a normalized .o file and -// creates corresponding lld::Reference objects. -llvm::Error convertRelocs(const Section §ion, - const NormalizedFile &normalizedFile, - bool scatterable, - MachOFile &file, - ArchHandler &handler) { - // Utility function for ArchHandler to find atom by its address. - auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, - const lld::Atom **atom, Reference::Addend *addend) - -> llvm::Error { - if (sectIndex > normalizedFile.sections.size()) - return llvm::make_error<GenericError>(Twine("out of range section " - "index (") + Twine(sectIndex) + ")"); - const Section *sect = nullptr; - if (sectIndex == 0) { - sect = findSectionCoveringAddress(normalizedFile, addr); - if (!sect) - return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) - + ") is not in any section")); - } else { - sect = &normalizedFile.sections[sectIndex-1]; - } - uint32_t offsetInTarget; - uint64_t offsetInSect = addr - sect->address; - *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); - *addend = offsetInTarget; - return llvm::Error::success(); - }; - - // Utility function for ArchHandler to find atom by its symbol index. - auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) - -> llvm::Error { - // Find symbol from index. - const Symbol *sym = nullptr; - uint32_t numStabs = normalizedFile.stabsSymbols.size(); - uint32_t numLocal = normalizedFile.localSymbols.size(); - uint32_t numGlobal = normalizedFile.globalSymbols.size(); - uint32_t numUndef = normalizedFile.undefinedSymbols.size(); - assert(symbolIndex >= numStabs && "Searched for stab via atomBySymbol?"); - if (symbolIndex < numStabs+numLocal) { - sym = &normalizedFile.localSymbols[symbolIndex-numStabs]; - } else if (symbolIndex < numStabs+numLocal+numGlobal) { - sym = &normalizedFile.globalSymbols[symbolIndex-numStabs-numLocal]; - } else if (symbolIndex < numStabs+numLocal+numGlobal+numUndef) { - sym = &normalizedFile.undefinedSymbols[symbolIndex-numStabs-numLocal- - numGlobal]; - } else { - return llvm::make_error<GenericError>(Twine("symbol index (") - + Twine(symbolIndex) + ") out of range"); - } - - // Find atom from symbol. - if ((sym->type & N_TYPE) == N_SECT) { - if (sym->sect > normalizedFile.sections.size()) - return llvm::make_error<GenericError>(Twine("symbol section index (") - + Twine(sym->sect) + ") out of range "); - const Section &symSection = normalizedFile.sections[sym->sect-1]; - uint64_t targetOffsetInSect = sym->value - symSection.address; - MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, - targetOffsetInSect); - if (target) { - *result = target; - return llvm::Error::success(); - } - return llvm::make_error<GenericError>("no atom found for defined symbol"); - } else if ((sym->type & N_TYPE) == N_UNDF) { - const lld::Atom *target = file.findUndefAtom(sym->name); - if (target) { - *result = target; - return llvm::Error::success(); - } - return llvm::make_error<GenericError>("no undefined atom found for sym"); - } else { - // Search undefs - return llvm::make_error<GenericError>("no atom found for symbol"); - } - }; - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - // Use old-school iterator so that paired relocations can be grouped. - for (auto it=section.relocations.begin(), e=section.relocations.end(); - it != e; ++it) { - const Relocation &reloc = *it; - // Find atom this relocation is in. - if (reloc.offset > section.content.size()) - return llvm::make_error<GenericError>( - Twine("r_address (") + Twine(reloc.offset) - + ") is larger than section size (" - + Twine(section.content.size()) + ")"); - uint32_t offsetInAtom; - MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, - reloc.offset, - &offsetInAtom); - assert(inAtom && "r_address in range, should have found atom"); - uint64_t fixupAddress = section.address + reloc.offset; - - const lld::Atom *target = nullptr; - Reference::Addend addend = 0; - Reference::KindValue kind; - if (handler.isPairedReloc(reloc)) { - // Handle paired relocations together. - const Relocation &reloc2 = *++it; - auto relocErr = handler.getPairReferenceInfo( - reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, - atomByAddr, atomBySymbol, &kind, &target, &addend); - if (relocErr) { - return handleErrors(std::move(relocErr), - [&](std::unique_ptr<GenericError> GE) { - return llvm::make_error<GenericError>( - Twine("bad relocation (") + GE->getMessage() - + ") in section " - + section.segmentName + "/" + section.sectionName - + " (r1_address=" + Twine::utohexstr(reloc.offset) - + ", r1_type=" + Twine(reloc.type) - + ", r1_extern=" + Twine(reloc.isExtern) - + ", r1_length=" + Twine((int)reloc.length) - + ", r1_pcrel=" + Twine(reloc.pcRel) - + (!reloc.scattered ? (Twine(", r1_symbolnum=") - + Twine(reloc.symbol)) - : (Twine(", r1_scattered=1, r1_value=") - + Twine(reloc.value))) - + ")" - + ", (r2_address=" + Twine::utohexstr(reloc2.offset) - + ", r2_type=" + Twine(reloc2.type) - + ", r2_extern=" + Twine(reloc2.isExtern) - + ", r2_length=" + Twine((int)reloc2.length) - + ", r2_pcrel=" + Twine(reloc2.pcRel) - + (!reloc2.scattered ? (Twine(", r2_symbolnum=") - + Twine(reloc2.symbol)) - : (Twine(", r2_scattered=1, r2_value=") - + Twine(reloc2.value))) - + ")" ); - }); - } - } - else { - // Use ArchHandler to convert relocation record into information - // needed to instantiate an lld::Reference object. - auto relocErr = handler.getReferenceInfo( - reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, - atomBySymbol, &kind, &target, &addend); - if (relocErr) { - return handleErrors(std::move(relocErr), - [&](std::unique_ptr<GenericError> GE) { - return llvm::make_error<GenericError>( - Twine("bad relocation (") + GE->getMessage() - + ") in section " - + section.segmentName + "/" + section.sectionName - + " (r_address=" + Twine::utohexstr(reloc.offset) - + ", r_type=" + Twine(reloc.type) - + ", r_extern=" + Twine(reloc.isExtern) - + ", r_length=" + Twine((int)reloc.length) - + ", r_pcrel=" + Twine(reloc.pcRel) - + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) - : (Twine(", r_scattered=1, r_value=") - + Twine(reloc.value))) - + ")" ); - }); - } - } - // Instantiate an lld::Reference object and add to its atom. - inAtom->addReference(Reference::KindNamespace::mach_o, - handler.kindArch(), - kind, offsetInAtom, target, addend); - } - - return llvm::Error::success(); -} - -bool isDebugInfoSection(const Section §ion) { - if ((section.attributes & S_ATTR_DEBUG) == 0) - return false; - return section.segmentName.equals("__DWARF"); -} - -static const Atom* findDefinedAtomByName(MachOFile &file, Twine name) { - std::string strName = name.str(); - for (auto *atom : file.defined()) - if (atom->name() == strName) - return atom; - return nullptr; -} - -static StringRef copyDebugString(StringRef str, BumpPtrAllocator &alloc) { - char *strCopy = alloc.Allocate<char>(str.size() + 1); - memcpy(strCopy, str.data(), str.size()); - strCopy[str.size()] = '\0'; - return strCopy; -} - -llvm::Error parseStabs(MachOFile &file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - - if (normalizedFile.stabsSymbols.empty()) - return llvm::Error::success(); - - // FIXME: Kill this off when we can move to sane yaml parsing. - std::unique_ptr<BumpPtrAllocator> allocator; - if (copyRefs) - allocator = llvm::make_unique<BumpPtrAllocator>(); - - enum { start, inBeginEnd } state = start; - - const Atom *currentAtom = nullptr; - uint64_t currentAtomAddress = 0; - StabsDebugInfo::StabsList stabsList; - for (const auto &stabSym : normalizedFile.stabsSymbols) { - Stab stab(nullptr, stabSym.type, stabSym.sect, stabSym.desc, - stabSym.value, stabSym.name); - switch (state) { - case start: - switch (static_cast<StabType>(stabSym.type)) { - case N_BNSYM: - state = inBeginEnd; - currentAtomAddress = stabSym.value; - Reference::Addend addend; - currentAtom = findAtomCoveringAddress(normalizedFile, file, - currentAtomAddress, addend); - if (addend != 0) - return llvm::make_error<GenericError>( - "Non-zero addend for BNSYM '" + stabSym.name + "' in " + - file.path()); - if (currentAtom) - stab.atom = currentAtom; - else { - // FIXME: ld64 just issues a warning here - should we match that? - return llvm::make_error<GenericError>( - "can't find atom for stabs BNSYM at " + - Twine::utohexstr(stabSym.value) + " in " + file.path()); - } - break; - case N_SO: - case N_OSO: - // Not associated with an atom, just copy. - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - break; - case N_GSYM: { - auto colonIdx = stabSym.name.find(':'); - if (colonIdx != StringRef::npos) { - StringRef name = stabSym.name.substr(0, colonIdx); - currentAtom = findDefinedAtomByName(file, "_" + name); - stab.atom = currentAtom; - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - } else { - currentAtom = findDefinedAtomByName(file, stabSym.name); - stab.atom = currentAtom; - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - } - if (stab.atom == nullptr) - return llvm::make_error<GenericError>( - "can't find atom for N_GSYM stabs" + stabSym.name + - " in " + file.path()); - break; - } - case N_FUN: - return llvm::make_error<GenericError>( - "old-style N_FUN stab '" + stabSym.name + "' unsupported"); - default: - return llvm::make_error<GenericError>( - "unrecognized stab symbol '" + stabSym.name + "'"); - } - break; - case inBeginEnd: - stab.atom = currentAtom; - switch (static_cast<StabType>(stabSym.type)) { - case N_ENSYM: - state = start; - currentAtom = nullptr; - break; - case N_FUN: - // Just copy the string. - if (copyRefs) - stab.str = copyDebugString(stabSym.name, *allocator); - else - stab.str = stabSym.name; - break; - default: - return llvm::make_error<GenericError>( - "unrecognized stab symbol '" + stabSym.name + "'"); - } - } - llvm::dbgs() << "Adding to stabsList: " << stab << "\n"; - stabsList.push_back(stab); - } - - file.setDebugInfo(llvm::make_unique<StabsDebugInfo>(std::move(stabsList))); - - // FIXME: Kill this off when we fix YAML memory ownership. - file.debugInfo()->setAllocator(std::move(allocator)); - - return llvm::Error::success(); -} - -static llvm::DataExtractor -dataExtractorFromSection(const NormalizedFile &normalizedFile, - const Section &S) { - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - StringRef SecData(reinterpret_cast<const char*>(S.content.data()), - S.content.size()); - return llvm::DataExtractor(SecData, !isBig, is64 ? 8 : 4); -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static uint32_t getCUAbbrevOffset(llvm::DataExtractor abbrevData, - uint64_t abbrCode) { - uint64_t curCode; - uint32_t offset = 0; - while ((curCode = abbrevData.getULEB128(&offset)) != abbrCode) { - // Tag - abbrevData.getULEB128(&offset); - // DW_CHILDREN - abbrevData.getU8(&offset); - // Attributes - while (abbrevData.getULEB128(&offset) | abbrevData.getULEB128(&offset)) - ; - } - return offset; -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static Expected<const char *> -getIndexedString(const NormalizedFile &normalizedFile, - llvm::dwarf::Form form, llvm::DataExtractor infoData, - uint32_t &infoOffset, const Section &stringsSection) { - if (form == llvm::dwarf::DW_FORM_string) - return infoData.getCStr(&infoOffset); - if (form != llvm::dwarf::DW_FORM_strp) - return llvm::make_error<GenericError>( - "string field encoded without DW_FORM_strp"); - uint32_t stringOffset = infoData.getU32(&infoOffset); - llvm::DataExtractor stringsData = - dataExtractorFromSection(normalizedFile, stringsSection); - return stringsData.getCStr(&stringOffset); -} - -// FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE -// inspection" code if possible. -static llvm::Expected<TranslationUnitSource> -readCompUnit(const NormalizedFile &normalizedFile, - const Section &info, - const Section &abbrev, - const Section &strings, - StringRef path) { - // FIXME: Cribbed from llvm-dwp -- should share "lightweight CU DIE - // inspection" code if possible. - uint32_t offset = 0; - llvm::dwarf::DwarfFormat Format = llvm::dwarf::DwarfFormat::DWARF32; - auto infoData = dataExtractorFromSection(normalizedFile, info); - uint32_t length = infoData.getU32(&offset); - if (length == 0xffffffff) { - Format = llvm::dwarf::DwarfFormat::DWARF64; - infoData.getU64(&offset); - } - else if (length > 0xffffff00) - return llvm::make_error<GenericError>("Malformed DWARF in " + path); - - uint16_t version = infoData.getU16(&offset); - - if (version < 2 || version > 4) - return llvm::make_error<GenericError>("Unsupported DWARF version in " + - path); - - infoData.getU32(&offset); // Abbrev offset (should be zero) - uint8_t addrSize = infoData.getU8(&offset); - - uint32_t abbrCode = infoData.getULEB128(&offset); - auto abbrevData = dataExtractorFromSection(normalizedFile, abbrev); - uint32_t abbrevOffset = getCUAbbrevOffset(abbrevData, abbrCode); - uint64_t tag = abbrevData.getULEB128(&abbrevOffset); - if (tag != llvm::dwarf::DW_TAG_compile_unit) - return llvm::make_error<GenericError>("top level DIE is not a compile unit"); - // DW_CHILDREN - abbrevData.getU8(&abbrevOffset); - uint32_t name; - llvm::dwarf::Form form; - llvm::dwarf::FormParams formParams = {version, addrSize, Format}; - TranslationUnitSource tu; - while ((name = abbrevData.getULEB128(&abbrevOffset)) | - (form = static_cast<llvm::dwarf::Form>( - abbrevData.getULEB128(&abbrevOffset))) && - (name != 0 || form != 0)) { - switch (name) { - case llvm::dwarf::DW_AT_name: { - if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, - strings)) - tu.name = *eName; - else - return eName.takeError(); - break; - } - case llvm::dwarf::DW_AT_comp_dir: { - if (auto eName = getIndexedString(normalizedFile, form, infoData, offset, - strings)) - tu.path = *eName; - else - return eName.takeError(); - break; - } - default: - llvm::DWARFFormValue::skipValue(form, infoData, &offset, formParams); - } - } - return tu; -} - -llvm::Error parseDebugInfo(MachOFile &file, - const NormalizedFile &normalizedFile, bool copyRefs) { - - // Find the interesting debug info sections. - const Section *debugInfo = nullptr; - const Section *debugAbbrev = nullptr; - const Section *debugStrings = nullptr; - - for (auto &s : normalizedFile.sections) { - if (s.segmentName == "__DWARF") { - if (s.sectionName == "__debug_info") - debugInfo = &s; - else if (s.sectionName == "__debug_abbrev") - debugAbbrev = &s; - else if (s.sectionName == "__debug_str") - debugStrings = &s; - } - } - - if (!debugInfo) - return parseStabs(file, normalizedFile, copyRefs); - - if (debugInfo->content.size() == 0) - return llvm::Error::success(); - - if (debugInfo->content.size() < 12) - return llvm::make_error<GenericError>("Malformed __debug_info section in " + - file.path() + ": too small"); - - if (!debugAbbrev) - return llvm::make_error<GenericError>("Missing __dwarf_abbrev section in " + - file.path()); - - if (auto tuOrErr = readCompUnit(normalizedFile, *debugInfo, *debugAbbrev, - *debugStrings, file.path())) { - // FIXME: Kill of allocator and code under 'copyRefs' when we fix YAML - // memory ownership. - std::unique_ptr<BumpPtrAllocator> allocator; - if (copyRefs) { - allocator = llvm::make_unique<BumpPtrAllocator>(); - tuOrErr->name = copyDebugString(tuOrErr->name, *allocator); - tuOrErr->path = copyDebugString(tuOrErr->path, *allocator); - } - file.setDebugInfo(llvm::make_unique<DwarfDebugInfo>(std::move(*tuOrErr))); - if (copyRefs) - file.debugInfo()->setAllocator(std::move(allocator)); - } else - return tuOrErr.takeError(); - - return llvm::Error::success(); -} - -static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { - if (is64) - return read64(addr, isBig); - - int32_t res = read32(addr, isBig); - return res; -} - -/// --- Augmentation String Processing --- - -struct CIEInfo { - bool _augmentationDataPresent = false; - bool _mayHaveEH = false; - uint32_t _offsetOfLSDA = ~0U; - uint32_t _offsetOfPersonality = ~0U; - uint32_t _offsetOfFDEPointerEncoding = ~0U; - uint32_t _augmentationDataLength = ~0U; -}; - -typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; - -static llvm::Error processAugmentationString(const uint8_t *augStr, - CIEInfo &cieInfo, - unsigned &len) { - - if (augStr[0] == '\0') { - len = 1; - return llvm::Error::success(); - } - - if (augStr[0] != 'z') - return llvm::make_error<GenericError>("expected 'z' at start of " - "augmentation string"); - - cieInfo._augmentationDataPresent = true; - uint64_t idx = 1; - - uint32_t offsetInAugmentationData = 0; - while (augStr[idx] != '\0') { - if (augStr[idx] == 'L') { - cieInfo._offsetOfLSDA = offsetInAugmentationData; - // This adds a single byte to the augmentation data. - ++offsetInAugmentationData; - ++idx; - continue; - } - if (augStr[idx] == 'P') { - cieInfo._offsetOfPersonality = offsetInAugmentationData; - // This adds a single byte to the augmentation data for the encoding, - // then a number of bytes for the pointer data. - // FIXME: We are assuming 4 is correct here for the pointer size as we - // always currently use delta32ToGOT. - offsetInAugmentationData += 5; - ++idx; - continue; - } - if (augStr[idx] == 'R') { - cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; - // This adds a single byte to the augmentation data. - ++offsetInAugmentationData; - ++idx; - continue; - } - if (augStr[idx] == 'e') { - if (augStr[idx + 1] != 'h') - return llvm::make_error<GenericError>("expected 'eh' in " - "augmentation string"); - cieInfo._mayHaveEH = true; - idx += 2; - continue; - } - ++idx; - } - - cieInfo._augmentationDataLength = offsetInAugmentationData; - - len = idx + 1; - return llvm::Error::success(); -} - -static llvm::Error processCIE(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler, - const Section *ehFrameSection, - MachODefinedAtom *atom, - uint64_t offset, - CIEInfoMap &cieInfos) { - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - const uint8_t *frameData = atom->rawContent().data(); - - CIEInfo cieInfo; - - uint32_t size = read32(frameData, isBig); - uint64_t cieIDField = size == 0xffffffffU - ? sizeof(uint32_t) + sizeof(uint64_t) - : sizeof(uint32_t); - uint64_t versionField = cieIDField + sizeof(uint32_t); - uint64_t augmentationStringField = versionField + sizeof(uint8_t); - - unsigned augmentationStringLength = 0; - if (auto err = processAugmentationString(frameData + augmentationStringField, - cieInfo, augmentationStringLength)) - return err; - - if (cieInfo._offsetOfPersonality != ~0U) { - // If we have augmentation data for the personality function, then we may - // need to implicitly generate its relocation. - - // Parse the EH Data field which is pointer sized. - uint64_t EHDataField = augmentationStringField + augmentationStringLength; - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); - - // Parse Code Align Factor which is a ULEB128. - uint64_t CodeAlignField = EHDataField + EHDataFieldSize; - unsigned lengthFieldSize = 0; - llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); - - // Parse Data Align Factor which is a SLEB128. - uint64_t DataAlignField = CodeAlignField + lengthFieldSize; - llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); - - // Parse Return Address Register which is a byte. - uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; - - // Parse the augmentation length which is a ULEB128. - uint64_t AugmentationLengthField = ReturnAddressField + 1; - uint64_t AugmentationLength = - llvm::decodeULEB128(frameData + AugmentationLengthField, - &lengthFieldSize); - - if (AugmentationLength != cieInfo._augmentationDataLength) - return llvm::make_error<GenericError>("CIE augmentation data length " - "mismatch"); - - // Get the start address of the augmentation data. - uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; - - // Parse the personality function from the augmentation data. - uint64_t PersonalityField = - AugmentationDataField + cieInfo._offsetOfPersonality; - - // Parse the personality encoding. - // FIXME: Verify that this is a 32-bit pcrel offset. - uint64_t PersonalityFunctionField = PersonalityField + 1; - - if (atom->begin() != atom->end()) { - // If we have an explicit relocation, then make sure it matches this - // offset as this is where we'd expect it to be applied to. - DefinedAtom::reference_iterator CurrentRef = atom->begin(); - if (CurrentRef->offsetInAtom() != PersonalityFunctionField) - return llvm::make_error<GenericError>("CIE personality reloc at " - "wrong offset"); - - if (++CurrentRef != atom->end()) - return llvm::make_error<GenericError>("CIE contains too many relocs"); - } else { - // Implicitly generate the personality function reloc. It's assumed to - // be a delta32 offset to a GOT entry. - // FIXME: Parse the encoding and check this. - int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); - uint64_t funcAddress = ehFrameSection->address + offset + - PersonalityFunctionField; - funcAddress += funcDelta; - - const MachODefinedAtom *func = nullptr; - Reference::Addend addend; - func = findAtomCoveringAddress(normalizedFile, file, funcAddress, - addend); - atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), - handler.unwindRefToPersonalityFunctionKind(), - PersonalityFunctionField, func, addend); - } - } else if (atom->begin() != atom->end()) { - // Otherwise, we expect there to be no relocations in this atom as the only - // relocation would have been to the personality function. - return llvm::make_error<GenericError>("unexpected relocation in CIE"); - } - - - cieInfos[atom] = std::move(cieInfo); - - return llvm::Error::success(); -} - -static llvm::Error processFDE(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler, - const Section *ehFrameSection, - MachODefinedAtom *atom, - uint64_t offset, - const CIEInfoMap &cieInfos) { - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); - - // Compiler wasn't lazy and actually told us what it meant. - // Unfortunately, the compiler may not have generated references for all of - // [cie, func, lsda] and so we still need to parse the FDE and add references - // for any the compiler didn't generate. - if (atom->begin() != atom->end()) - atom->sortReferences(); - - DefinedAtom::reference_iterator CurrentRef = atom->begin(); - - // This helper returns the reference (if one exists) at the offset we are - // currently processing. It automatically increments the ref iterator if we - // do return a ref, and throws an error if we pass over a ref without - // comsuming it. - auto currentRefGetter = [&CurrentRef, - &atom](uint64_t Offset)->const Reference* { - // If there are no more refs found, then we are done. - if (CurrentRef == atom->end()) - return nullptr; - - const Reference *Ref = *CurrentRef; - - // If we haven't reached the offset for this reference, then return that - // we don't yet have a reference to process. - if (Offset < Ref->offsetInAtom()) - return nullptr; - - // If the offset is equal, then we want to process this ref. - if (Offset == Ref->offsetInAtom()) { - ++CurrentRef; - return Ref; - } - - // The current ref is at an offset which is earlier than the current - // offset, then we failed to consume it when we should have. In this case - // throw an error. - llvm::report_fatal_error("Skipped reference when processing FDE"); - }; - - // Helper to either get the reference at this current location, and verify - // that it is of the expected type, or add a reference of that type. - // Returns the reference target. - auto verifyOrAddReference = [&](uint64_t targetAddress, - Reference::KindValue refKind, - uint64_t refAddress, - bool allowsAddend)->const Atom* { - if (auto *ref = currentRefGetter(refAddress)) { - // The compiler already emitted a relocation for the CIE ref. This should - // have been converted to the correct type of reference in - // get[Pair]ReferenceInfo(). - assert(ref->kindValue() == refKind && - "Incorrect EHFrame reference kind"); - return ref->target(); - } - Reference::Addend addend; - auto *target = findAtomCoveringAddress(normalizedFile, file, - targetAddress, addend); - atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), - refKind, refAddress, target, addend); - - if (!allowsAddend) - assert(!addend && "EHFrame reference cannot have addend"); - return target; - }; - - const uint8_t *startFrameData = atom->rawContent().data(); - const uint8_t *frameData = startFrameData; - - uint32_t size = read32(frameData, isBig); - uint64_t cieFieldInFDE = size == 0xffffffffU - ? sizeof(uint32_t) + sizeof(uint64_t) - : sizeof(uint32_t); - - // Linker needs to fixup a reference from the FDE to its parent CIE (a - // 32-bit byte offset backwards in the __eh_frame section). - uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); - uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; - cieAddress -= cieDelta; - - auto *cieRefTarget = verifyOrAddReference(cieAddress, - handler.unwindRefToCIEKind(), - cieFieldInFDE, false); - const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); - assert(cie && cie->contentType() == DefinedAtom::typeCFI && - "FDE's CIE field does not point at the start of a CIE."); - - const CIEInfo &cieInfo = cieInfos.find(cie)->second; - - // Linker needs to fixup reference from the FDE to the function it's - // describing. FIXME: there are actually different ways to do this, and the - // particular method used is specified in the CIE's augmentation fields - // (hopefully) - uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); - - int64_t functionFromFDE = readSPtr(is64, isBig, - frameData + rangeFieldInFDE); - uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; - rangeStart += functionFromFDE; - - verifyOrAddReference(rangeStart, - handler.unwindRefToFunctionKind(), - rangeFieldInFDE, true); - - // Handle the augmentation data if there is any. - if (cieInfo._augmentationDataPresent) { - // First process the augmentation data length field. - uint64_t augmentationDataLengthFieldInFDE = - rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); - unsigned lengthFieldSize = 0; - uint64_t augmentationDataLength = - llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, - &lengthFieldSize); - - if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { - - // Look at the augmentation data field. - uint64_t augmentationDataFieldInFDE = - augmentationDataLengthFieldInFDE + lengthFieldSize; - - int64_t lsdaFromFDE = readSPtr(is64, isBig, - frameData + augmentationDataFieldInFDE); - uint64_t lsdaStart = - ehFrameSection->address + offset + augmentationDataFieldInFDE + - lsdaFromFDE; - - verifyOrAddReference(lsdaStart, - handler.unwindRefToFunctionKind(), - augmentationDataFieldInFDE, true); - } - } - - return llvm::Error::success(); -} - -llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, - MachOFile &file, - mach_o::ArchHandler &handler) { - - const Section *ehFrameSection = nullptr; - for (auto §ion : normalizedFile.sections) - if (section.segmentName == "__TEXT" && - section.sectionName == "__eh_frame") { - ehFrameSection = §ion; - break; - } - - // No __eh_frame so nothing to do. - if (!ehFrameSection) - return llvm::Error::success(); - - llvm::Error ehFrameErr = llvm::Error::success(); - CIEInfoMap cieInfos; - - file.eachAtomInSection(*ehFrameSection, - [&](MachODefinedAtom *atom, uint64_t offset) -> void { - assert(atom->contentType() == DefinedAtom::typeCFI); - - // Bail out if we've encountered an error. - if (ehFrameErr) - return; - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - if (ArchHandler::isDwarfCIE(isBig, atom)) - ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, - atom, offset, cieInfos); - else - ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, - atom, offset, cieInfos); - }); - - return ehFrameErr; -} - -llvm::Error parseObjCImageInfo(const Section §, - const NormalizedFile &normalizedFile, - MachOFile &file) { - - // struct objc_image_info { - // uint32_t version; // initially 0 - // uint32_t flags; - // }; - - ArrayRef<uint8_t> content = sect.content; - if (content.size() != 8) - return llvm::make_error<GenericError>(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " should be 8 bytes in size"); - - const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); - uint32_t version = read32(content.data(), isBig); - if (version) - return llvm::make_error<GenericError>(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " should have version=0"); - - uint32_t flags = read32(content.data() + 4, isBig); - if (flags & (MachOLinkingContext::objc_supports_gc | - MachOLinkingContext::objc_gc_only)) - return llvm::make_error<GenericError>(sect.segmentName + "/" + - sect.sectionName + - " in file " + file.path() + - " uses GC. This is not supported"); - - if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) - file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); - else - file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); - - file.setSwiftVersion((flags >> 8) & 0xFF); - - return llvm::Error::success(); -} - -/// Converts normalized mach-o file into an lld::File and lld::Atoms. -llvm::Expected<std::unique_ptr<lld::File>> -objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - std::unique_ptr<MachOFile> file(new MachOFile(path)); - if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) - return std::move(ec); - return std::unique_ptr<File>(std::move(file)); -} - -llvm::Expected<std::unique_ptr<lld::File>> -dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - // Instantiate SharedLibraryFile object. - std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); - if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) - return std::move(ec); - return std::unique_ptr<File>(std::move(file)); -} - -} // anonymous namespace - -namespace normalized { - -static bool isObjCImageInfo(const Section §) { - return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || - (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); -} - -llvm::Error -normalizedObjectToAtoms(MachOFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - LLVM_DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " - << file->path() << "\n"); - bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); - - // Create atoms from each section. - for (auto § : normalizedFile.sections) { - - // If this is a debug-info section parse it specially. - if (isDebugInfoSection(sect)) - continue; - - // If the file contains an objc_image_info struct, then we should parse the - // ObjC flags and Swift version. - if (isObjCImageInfo(sect)) { - if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) - return ec; - // We then skip adding atoms for this section as we use the ObjCPass to - // re-emit this data after it has been aggregated for all files. - continue; - } - - bool customSectionName; - DefinedAtom::ContentType atomType = atomTypeFromSection(sect, - customSectionName); - if (auto ec = processSection(atomType, sect, customSectionName, - normalizedFile, *file, scatterable, copyRefs)) - return ec; - } - // Create atoms from undefined symbols. - for (auto &sym : normalizedFile.undefinedSymbols) { - // Undefinded symbols with n_value != 0 are actually tentative definitions. - if (sym.value == Hex64(0)) { - file->addUndefinedAtom(sym.name, copyRefs); - } else { - file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, - DefinedAtom::Alignment(1 << (sym.desc >> 8)), - copyRefs); - } - } - - // Convert mach-o relocations to References - std::unique_ptr<mach_o::ArchHandler> handler - = ArchHandler::create(normalizedFile.arch); - for (auto § : normalizedFile.sections) { - if (isDebugInfoSection(sect)) - continue; - if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, - *file, *handler)) - return ec; - } - - // Add additional arch-specific References - file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { - handler->addAdditionalReferences(*atom); - }); - - // Each __eh_frame section needs references to both __text (the function we're - // providing unwind info for) and itself (FDE -> CIE). These aren't - // represented in the relocations on some architectures, so we have to add - // them back in manually there. - if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) - return ec; - - // Process mach-o data-in-code regions array. That information is encoded in - // atoms as References at each transition point. - unsigned nextIndex = 0; - for (const DataInCode &entry : normalizedFile.dataInCode) { - ++nextIndex; - const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); - if (!s) { - return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" - + Twine(entry.offset) - + ") is not in any section")); - } - uint64_t offsetInSect = entry.offset - s->address; - uint32_t offsetInAtom; - MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, - &offsetInAtom); - if (offsetInAtom + entry.length > atom->size()) { - return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " - "(offset=" - + Twine(entry.offset) - + ", length=" - + Twine(entry.length) - + ") crosses atom boundary.")); - } - // Add reference that marks start of data-in-code. - atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), - handler->dataInCodeTransitionStart(*atom), - offsetInAtom, atom, entry.kind); - - // Peek at next entry, if it starts where this one ends, skip ending ref. - if (nextIndex < normalizedFile.dataInCode.size()) { - const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; - if (nextEntry.offset == (entry.offset + entry.length)) - continue; - } - - // If data goes to end of function, skip ending ref. - if ((offsetInAtom + entry.length) == atom->size()) - continue; - - // Add reference that marks end of data-in-code. - atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), - handler->dataInCodeTransitionEnd(*atom), - offsetInAtom+entry.length, atom, 0); - } - - // Cache some attributes on the file for use later. - file->setFlags(normalizedFile.flags); - file->setArch(normalizedFile.arch); - file->setOS(normalizedFile.os); - file->setMinVersion(normalizedFile.minOSverson); - file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); - - // Sort references in each atom to their canonical order. - for (const DefinedAtom* defAtom : file->defined()) { - reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); - } - - if (auto err = parseDebugInfo(*file, normalizedFile, copyRefs)) - return err; - - return llvm::Error::success(); -} - -llvm::Error -normalizedDylibToAtoms(MachODylibFile *file, - const NormalizedFile &normalizedFile, - bool copyRefs) { - file->setInstallName(normalizedFile.installName); - file->setCompatVersion(normalizedFile.compatVersion); - file->setCurrentVersion(normalizedFile.currentVersion); - - // Tell MachODylibFile object about all symbols it exports. - if (!normalizedFile.exportInfo.empty()) { - // If exports trie exists, use it instead of traditional symbol table. - for (const Export &exp : normalizedFile.exportInfo) { - bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); - // StringRefs from export iterator are ephemeral, so force copy. - file->addExportedSymbol(exp.name, weakDef, true); - } - } else { - for (auto &sym : normalizedFile.globalSymbols) { - assert((sym.scope & N_EXT) && "only expect external symbols here"); - bool weakDef = (sym.desc & N_WEAK_DEF); - file->addExportedSymbol(sym.name, weakDef, copyRefs); - } - } - // Tell MachODylibFile object about all dylibs it re-exports. - for (const DependentDylib &dep : normalizedFile.dependentDylibs) { - if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) - file->addReExportedDylib(dep.path); - } - return llvm::Error::success(); -} - -void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, - StringRef &segmentName, - StringRef §ionName, - SectionType §ionType, - SectionAttr §ionAttrs, - bool &relocsToDefinedCanBeImplicit) { - - for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; - p->atomType != DefinedAtom::typeUnknown; ++p) { - if (p->atomType != atomType) - continue; - // Wild carded entries are ignored for reverse lookups. - if (p->segmentName.empty() || p->sectionName.empty()) - continue; - segmentName = p->segmentName; - sectionName = p->sectionName; - sectionType = p->sectionType; - sectionAttrs = 0; - relocsToDefinedCanBeImplicit = false; - if (atomType == DefinedAtom::typeCode) - sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; - if (atomType == DefinedAtom::typeCFI) - relocsToDefinedCanBeImplicit = true; - return; - } - llvm_unreachable("content type not yet supported"); -} - -llvm::Expected<std::unique_ptr<lld::File>> -normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, - bool copyRefs) { - switch (normalizedFile.fileType) { - case MH_DYLIB: - case MH_DYLIB_STUB: - return dylibToAtoms(normalizedFile, path, copyRefs); - case MH_OBJECT: - return objectToAtoms(normalizedFile, path, copyRefs); - default: - llvm_unreachable("unhandled MachO file type!"); - } -} - -} // namespace normalized -} // namespace mach_o -} // namespace lld |
