diff options
Diffstat (limited to 'gnu/llvm/tools/lld/lib')
49 files changed, 19397 insertions, 0 deletions
diff --git a/gnu/llvm/tools/lld/lib/CMakeLists.txt b/gnu/llvm/tools/lld/lib/CMakeLists.txt new file mode 100644 index 00000000000..699f5e93f8a --- /dev/null +++ b/gnu/llvm/tools/lld/lib/CMakeLists.txt @@ -0,0 +1,4 @@ +add_subdirectory(Config) +add_subdirectory(Core) +add_subdirectory(Driver) +add_subdirectory(ReaderWriter) diff --git a/gnu/llvm/tools/lld/lib/Config/CMakeLists.txt b/gnu/llvm/tools/lld/lib/Config/CMakeLists.txt new file mode 100644 index 00000000000..e971b0b7aa6 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Config/CMakeLists.txt @@ -0,0 +1,9 @@ +add_lld_library(lldConfig + Version.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Config + + LINK_LIBS + LLVMSupport + ) diff --git a/gnu/llvm/tools/lld/lib/Config/Version.cpp b/gnu/llvm/tools/lld/lib/Config/Version.cpp new file mode 100644 index 00000000000..60687b9d894 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Config/Version.cpp @@ -0,0 +1,57 @@ +//===- lib/Config/Version.cpp - LLD Version Number ---------------*- C++-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several version-related utility functions for LLD. +// +//===----------------------------------------------------------------------===// + +#include "lld/Config/Version.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace lld { + +StringRef getLLDRepositoryPath() { +#ifdef LLD_REPOSITORY_STRING + return LLD_REPOSITORY_STRING; +#else + return ""; +#endif +} + +StringRef getLLDRevision() { +#ifdef LLD_REVISION_STRING + return LLD_REVISION_STRING; +#else + return ""; +#endif +} + +std::string getLLDRepositoryVersion() { + std::string S = getLLDRepositoryPath(); + std::string T = getLLDRevision(); + if (S.empty() && T.empty()) + return ""; + if (!S.empty() && !T.empty()) + return "(" + S + " " + T + ")"; + if (!S.empty()) + return "(" + S + ")"; + return "(" + T + ")"; +} + +StringRef getLLDVersion() { +#ifdef LLD_VERSION_STRING + return LLD_VERSION_STRING; +#else + return ""; +#endif +} + +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/CMakeLists.txt b/gnu/llvm/tools/lld/lib/Core/CMakeLists.txt new file mode 100644 index 00000000000..41e0e7661b9 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/CMakeLists.txt @@ -0,0 +1,16 @@ +add_lld_library(lldCore + DefinedAtom.cpp + Error.cpp + File.cpp + LinkingContext.cpp + Reader.cpp + Resolver.cpp + SymbolTable.cpp + Writer.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Core + + LINK_LIBS + LLVMSupport + ) diff --git a/gnu/llvm/tools/lld/lib/Core/DefinedAtom.cpp b/gnu/llvm/tools/lld/lib/Core/DefinedAtom.cpp new file mode 100644 index 00000000000..8dc4d4a16f9 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/DefinedAtom.cpp @@ -0,0 +1,94 @@ +//===- DefinedAtom.cpp ------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ErrorHandling.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" + +namespace lld { + +DefinedAtom::ContentPermissions DefinedAtom::permissions() const { + // By default base permissions on content type. + return permissions(this->contentType()); +} + +// Utility function for deriving permissions from content type +DefinedAtom::ContentPermissions DefinedAtom::permissions(ContentType type) { + switch (type) { + case typeCode: + case typeResolver: + case typeBranchIsland: + case typeBranchShim: + case typeStub: + case typeStubHelper: + case typeMachHeader: + return permR_X; + + case typeConstant: + case typeCString: + case typeUTF16String: + case typeCFI: + case typeLSDA: + case typeLiteral4: + case typeLiteral8: + case typeLiteral16: + case typeDTraceDOF: + case typeCompactUnwindInfo: + case typeProcessedUnwindInfo: + case typeObjCImageInfo: + case typeObjCMethodList: + return permR__; + + case typeData: + case typeDataFast: + case typeZeroFill: + case typeZeroFillFast: + case typeObjC1Class: + case typeLazyPointer: + case typeLazyDylibPointer: + case typeNonLazyPointer: + case typeThunkTLV: + return permRW_; + + case typeGOT: + case typeConstData: + case typeCFString: + case typeInitializerPtr: + case typeTerminatorPtr: + case typeCStringPtr: + case typeObjCClassPtr: + case typeObjC2CategoryList: + case typeInterposingTuples: + case typeTLVInitialData: + case typeTLVInitialZeroFill: + case typeTLVInitializerPtr: + return permRW_L; + + case typeUnknown: + case typeTempLTO: + case typeSectCreate: + case typeDSOHandle: + return permUnknown; + } + llvm_unreachable("unknown content type"); +} + +bool DefinedAtom::compareByPosition(const DefinedAtom *lhs, + const DefinedAtom *rhs) { + if (lhs == rhs) + return false; + const File *lhsFile = &lhs->file(); + const File *rhsFile = &rhs->file(); + if (lhsFile->ordinal() != rhsFile->ordinal()) + return lhsFile->ordinal() < rhsFile->ordinal(); + assert(lhs->ordinal() != rhs->ordinal()); + return lhs->ordinal() < rhs->ordinal(); +} + +} // namespace diff --git a/gnu/llvm/tools/lld/lib/Core/Error.cpp b/gnu/llvm/tools/lld/lib/Core/Error.cpp new file mode 100644 index 00000000000..4df1ce120bd --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/Error.cpp @@ -0,0 +1,91 @@ +//===- Error.cpp - system_error extensions for lld --------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Error.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/ErrorHandling.h" +#include <mutex> +#include <string> +#include <vector> + +using namespace lld; + +class _YamlReaderErrorCategory : public std::error_category { +public: + const char* name() const LLVM_NOEXCEPT override { + return "lld.yaml.reader"; + } + + std::string message(int ev) const override { + switch (static_cast<YamlReaderError>(ev)) { + case YamlReaderError::unknown_keyword: + return "Unknown keyword found in yaml file"; + case YamlReaderError::illegal_value: + return "Bad value found in yaml file"; + } + llvm_unreachable("An enumerator of YamlReaderError does not have a " + "message defined."); + } +}; + +const std::error_category &lld::YamlReaderCategory() { + static _YamlReaderErrorCategory o; + return o; +} + +namespace lld { + +/// Temporary class to enable make_dynamic_error_code() until +/// llvm::ErrorOr<> is updated to work with error encapsulations +/// other than error_code. +class dynamic_error_category : public std::error_category { +public: + ~dynamic_error_category() override = default; + + const char *name() const LLVM_NOEXCEPT override { + return "lld.dynamic_error"; + } + + std::string message(int ev) const override { + assert(ev >= 0); + assert(ev < (int)_messages.size()); + // The value is an index into the string vector. + return _messages[ev]; + } + + int add(std::string msg) { + std::lock_guard<std::recursive_mutex> lock(_mutex); + // Value zero is always the successs value. + if (_messages.empty()) + _messages.push_back("Success"); + _messages.push_back(msg); + // Return the index of the string just appended. + return _messages.size() - 1; + } + +private: + std::vector<std::string> _messages; + std::recursive_mutex _mutex; +}; + +static dynamic_error_category categorySingleton; + +std::error_code make_dynamic_error_code(StringRef msg) { + return std::error_code(categorySingleton.add(msg), categorySingleton); +} + +char GenericError::ID = 0; + +GenericError::GenericError(Twine Msg) : Msg(Msg.str()) { } + +void GenericError::log(raw_ostream &OS) const { + OS << Msg; +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/File.cpp b/gnu/llvm/tools/lld/lib/Core/File.cpp new file mode 100644 index 00000000000..b84132bfecd --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/File.cpp @@ -0,0 +1,30 @@ +//===- Core/File.cpp - A Container of Atoms -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include <mutex> + +namespace lld { + +File::~File() { } + +File::AtomVector<DefinedAtom> File::_noDefinedAtoms; +File::AtomVector<UndefinedAtom> File::_noUndefinedAtoms; +File::AtomVector<SharedLibraryAtom> File::_noSharedLibraryAtoms; +File::AtomVector<AbsoluteAtom> File::_noAbsoluteAtoms; + +std::error_code File::parse() { + std::lock_guard<std::mutex> lock(_parseMutex); + if (!_lastError.hasValue()) + _lastError = doParse(); + return _lastError.getValue(); +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/LinkingContext.cpp b/gnu/llvm/tools/lld/lib/Core/LinkingContext.cpp new file mode 100644 index 00000000000..2732543d306 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/LinkingContext.cpp @@ -0,0 +1,69 @@ +//===- lib/Core/LinkingContext.cpp - Linker Context Object Interface ------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/Simple.h" +#include "lld/Core/Writer.h" +#include "llvm/ADT/Triple.h" + +namespace lld { + +LinkingContext::LinkingContext() {} + +LinkingContext::~LinkingContext() {} + +bool LinkingContext::validate(raw_ostream &diagnostics) { + return validateImpl(diagnostics); +} + +llvm::Error LinkingContext::writeFile(const File &linkedFile) const { + return this->writer().writeFile(linkedFile, _outputPath); +} + +std::unique_ptr<File> LinkingContext::createEntrySymbolFile() const { + return createEntrySymbolFile("<command line option -e>"); +} + +std::unique_ptr<File> +LinkingContext::createEntrySymbolFile(StringRef filename) const { + if (entrySymbolName().empty()) + return nullptr; + std::unique_ptr<SimpleFile> entryFile(new SimpleFile(filename, + File::kindEntryObject)); + entryFile->addAtom( + *(new (_allocator) SimpleUndefinedAtom(*entryFile, entrySymbolName()))); + return std::move(entryFile); +} + +std::unique_ptr<File> LinkingContext::createUndefinedSymbolFile() const { + return createUndefinedSymbolFile("<command line option -u or --defsym>"); +} + +std::unique_ptr<File> +LinkingContext::createUndefinedSymbolFile(StringRef filename) const { + if (_initialUndefinedSymbols.empty()) + return nullptr; + std::unique_ptr<SimpleFile> undefinedSymFile( + new SimpleFile(filename, File::kindUndefinedSymsObject)); + for (StringRef undefSym : _initialUndefinedSymbols) + undefinedSymFile->addAtom(*(new (_allocator) SimpleUndefinedAtom( + *undefinedSymFile, undefSym))); + return std::move(undefinedSymFile); +} + +void LinkingContext::createInternalFiles( + std::vector<std::unique_ptr<File> > &result) const { + if (std::unique_ptr<File> file = createEntrySymbolFile()) + result.push_back(std::move(file)); + if (std::unique_ptr<File> file = createUndefinedSymbolFile()) + result.push_back(std::move(file)); +} + +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/Reader.cpp b/gnu/llvm/tools/lld/lib/Core/Reader.cpp new file mode 100644 index 00000000000..107db07891d --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/Reader.cpp @@ -0,0 +1,110 @@ +//===- lib/Core/Reader.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/FileUtilities.h" +#include "llvm/Support/MemoryBuffer.h" +#include <memory> +#include <system_error> + +namespace lld { + +YamlIOTaggedDocumentHandler::~YamlIOTaggedDocumentHandler() {} + +void Registry::add(std::unique_ptr<Reader> reader) { + _readers.push_back(std::move(reader)); +} + +void Registry::add(std::unique_ptr<YamlIOTaggedDocumentHandler> handler) { + _yamlHandlers.push_back(std::move(handler)); +} + +ErrorOr<std::unique_ptr<File>> +Registry::loadFile(std::unique_ptr<MemoryBuffer> mb) const { + // Get file magic. + StringRef content(mb->getBufferStart(), mb->getBufferSize()); + llvm::sys::fs::file_magic fileType = llvm::sys::fs::identify_magic(content); + + // Ask each registered reader if it can handle this file type or extension. + for (const std::unique_ptr<Reader> &reader : _readers) { + if (!reader->canParse(fileType, mb->getMemBufferRef())) + continue; + return reader->loadFile(std::move(mb), *this); + } + + // No Reader could parse this file. + return make_error_code(llvm::errc::executable_format_error); +} + +static const Registry::KindStrings kindStrings[] = { + {Reference::kindLayoutAfter, "layout-after"}, + {Reference::kindAssociate, "associate"}, + LLD_KIND_STRING_END}; + +Registry::Registry() { + addKindTable(Reference::KindNamespace::all, Reference::KindArch::all, + kindStrings); +} + +bool Registry::handleTaggedDoc(llvm::yaml::IO &io, + const lld::File *&file) const { + for (const std::unique_ptr<YamlIOTaggedDocumentHandler> &h : _yamlHandlers) + if (h->handledDocTag(io, file)) + return true; + return false; +} + + +void Registry::addKindTable(Reference::KindNamespace ns, + Reference::KindArch arch, + const KindStrings array[]) { + KindEntry entry = { ns, arch, array }; + _kindEntries.push_back(entry); +} + +bool Registry::referenceKindFromString(StringRef inputStr, + Reference::KindNamespace &ns, + Reference::KindArch &arch, + Reference::KindValue &value) const { + for (const KindEntry &entry : _kindEntries) { + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (!inputStr.equals(pair->name)) + continue; + ns = entry.ns; + arch = entry.arch; + value = pair->value; + return true; + } + } + return false; +} + +bool Registry::referenceKindToString(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue value, + StringRef &str) const { + for (const KindEntry &entry : _kindEntries) { + if (entry.ns != ns) + continue; + if (entry.arch != arch) + continue; + for (const KindStrings *pair = entry.array; !pair->name.empty(); ++pair) { + if (pair->value != value) + continue; + str = pair->name; + return true; + } + } + return false; +} + +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/Resolver.cpp b/gnu/llvm/tools/lld/lib/Core/Resolver.cpp new file mode 100644 index 00000000000..ef694fd972f --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/Resolver.cpp @@ -0,0 +1,505 @@ +//===- Core/Resolver.cpp - Resolves Atom References -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/Atom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/SymbolTable.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <utility> +#include <vector> + +namespace lld { + +llvm::Expected<bool> Resolver::handleFile(File &file) { + if (auto ec = _ctx.handleLoadedFile(file)) + return std::move(ec); + bool undefAdded = false; + for (auto &atom : file.defined().owning_ptrs()) + doDefinedAtom(std::move(atom)); + for (auto &atom : file.undefined().owning_ptrs()) { + if (doUndefinedAtom(std::move(atom))) + undefAdded = true; + } + for (auto &atom : file.sharedLibrary().owning_ptrs()) + doSharedLibraryAtom(std::move(atom)); + for (auto &atom : file.absolute().owning_ptrs()) + doAbsoluteAtom(std::move(atom)); + return undefAdded; +} + +llvm::Expected<bool> Resolver::forEachUndefines(File &file, + UndefCallback callback) { + size_t i = _undefineIndex[&file]; + bool undefAdded = false; + do { + for (; i < _undefines.size(); ++i) { + StringRef undefName = _undefines[i]; + if (undefName.empty()) + continue; + const Atom *atom = _symbolTable.findByName(undefName); + if (!isa<UndefinedAtom>(atom) || _symbolTable.isCoalescedAway(atom)) { + // The symbol was resolved by some other file. Cache the result. + _undefines[i] = ""; + continue; + } + auto undefAddedOrError = callback(undefName); + if (auto ec = undefAddedOrError.takeError()) + return std::move(ec); + undefAdded |= undefAddedOrError.get(); + } + } while (i < _undefines.size()); + _undefineIndex[&file] = i; + return undefAdded; +} + +llvm::Expected<bool> Resolver::handleArchiveFile(File &file) { + ArchiveLibraryFile *archiveFile = cast<ArchiveLibraryFile>(&file); + return forEachUndefines(file, + [&](StringRef undefName) -> llvm::Expected<bool> { + if (File *member = archiveFile->find(undefName)) { + member->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + return handleFile(*member); + } + return false; + }); +} + +llvm::Error Resolver::handleSharedLibrary(File &file) { + // Add all the atoms from the shared library + SharedLibraryFile *sharedLibrary = cast<SharedLibraryFile>(&file); + auto undefAddedOrError = handleFile(*sharedLibrary); + if (auto ec = undefAddedOrError.takeError()) + return ec; + undefAddedOrError = + forEachUndefines(file, [&](StringRef undefName) -> llvm::Expected<bool> { + auto atom = sharedLibrary->exports(undefName); + if (atom.get()) + doSharedLibraryAtom(std::move(atom)); + return false; + }); + + if (auto ec = undefAddedOrError.takeError()) + return ec; + return llvm::Error(); +} + +bool Resolver::doUndefinedAtom(OwningAtomPtr<UndefinedAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " UndefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" << atom.get()->name() << "\n"); + + // tell symbol table + bool newUndefAdded = _symbolTable.add(*atom.get()); + if (newUndefAdded) + _undefines.push_back(atom.get()->name()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); + + return newUndefAdded; +} + +// Called on each atom when a file is added. Returns true if a given +// atom is added to the symbol table. +void Resolver::doDefinedAtom(OwningAtomPtr<DefinedAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " DefinedAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", file=#" + << atom.get()->file().ordinal() + << ", atom=#" + << atom.get()->ordinal() + << ", name=" + << atom.get()->name() + << ", type=" + << atom.get()->contentType() + << "\n"); + + // An atom that should never be dead-stripped is a dead-strip root. + if (_ctx.deadStrip() && + atom.get()->deadStrip() == DefinedAtom::deadStripNever) { + _deadStripRoots.insert(atom.get()); + } + + // add to list of known atoms + _symbolTable.add(*atom.get()); + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +void Resolver::doSharedLibraryAtom(OwningAtomPtr<SharedLibraryAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " SharedLibraryAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +void Resolver::doAbsoluteAtom(OwningAtomPtr<AbsoluteAtom> atom) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << " AbsoluteAtom: " + << llvm::format("0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + + // tell symbol table + if (atom.get()->scope() != Atom::scopeTranslationUnit) + _symbolTable.add(*atom.get()); + + // add to list of known atoms + _atoms.push_back(OwningAtomPtr<Atom>(atom.release())); +} + +// Returns true if at least one of N previous files has created an +// undefined symbol. +bool Resolver::undefinesAdded(int begin, int end) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + for (int i = begin; i < end; ++i) + if (FileNode *node = dyn_cast<FileNode>(inputs[i].get())) + if (_newUndefinesAdded[node->getFile()]) + return true; + return false; +} + +File *Resolver::getFile(int &index) { + std::vector<std::unique_ptr<Node>> &inputs = _ctx.getNodes(); + if ((size_t)index >= inputs.size()) + return nullptr; + if (GroupEnd *group = dyn_cast<GroupEnd>(inputs[index].get())) { + // We are at the end of the current group. If one or more new + // undefined atom has been added in the last groupSize files, we + // reiterate over the files. + int size = group->getSize(); + if (undefinesAdded(index - size, index)) { + index -= size; + return getFile(index); + } + ++index; + return getFile(index); + } + return cast<FileNode>(inputs[index++].get())->getFile(); +} + +// Keep adding atoms until _ctx.getNextFile() returns an error. This +// function is where undefined atoms are resolved. +bool Resolver::resolveUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving undefines:\n"); + ScopedTask task(getDefaultDomain(), "resolveUndefines"); + int index = 0; + std::set<File *> seen; + for (;;) { + bool undefAdded = false; + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loading file #" << index << "\n"); + File *file = getFile(index); + if (!file) + return true; + if (std::error_code ec = file->parse()) { + llvm::errs() << "Cannot open " + file->path() + << ": " << ec.message() << "\n"; + return false; + } + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "Loaded file: " << file->path() << "\n"); + switch (file->kind()) { + case File::kindErrorObject: + case File::kindNormalizedObject: + case File::kindMachObject: + case File::kindCEntryObject: + case File::kindHeaderObject: + case File::kindEntryObject: + case File::kindUndefinedSymsObject: + case File::kindStubHelperObject: + case File::kindResolverMergedObject: + case File::kindSectCreateObject: { + // The same file may be visited more than once if the file is + // in --start-group and --end-group. Only library files should + // be processed more than once. + if (seen.count(file)) + break; + seen.insert(file); + assert(!file->hasOrdinal()); + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindArchiveLibrary: { + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + auto undefAddedOrError = handleArchiveFile(*file); + if (auto EC = undefAddedOrError.takeError()) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + undefAdded = undefAddedOrError.get(); + break; + } + case File::kindSharedLibrary: + if (!file->hasOrdinal()) + file->setOrdinal(_ctx.getNextOrdinalAndIncrement()); + if (auto EC = handleSharedLibrary(*file)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + llvm::errs() << "Error in " + file->path() << ": "; + logAllUnhandledErrors(std::move(EC), llvm::errs(), std::string()); + return false; + } + break; + } + _newUndefinesAdded[file] = undefAdded; + } +} + +// switch all references to undefined or coalesced away atoms +// to the new defined atom +void Resolver::updateReferences() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Updating references:\n"); + ScopedTask task(getDefaultDomain(), "updateReferences"); + for (const OwningAtomPtr<Atom> &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) { + for (const Reference *ref : *defAtom) { + // A reference of type kindAssociate should't be updated. + // Instead, an atom having such reference will be removed + // if the target atom is coalesced away, so that they will + // go away as a group. + if (ref->kindNamespace() == lld::Reference::KindNamespace::all && + ref->kindValue() == lld::Reference::kindAssociate) { + if (_symbolTable.isCoalescedAway(atom.get())) + _deadAtoms.insert(ref->target()); + continue; + } + const Atom *newTarget = _symbolTable.replacement(ref->target()); + const_cast<Reference *>(ref)->setTarget(newTarget); + } + } + } +} + +// For dead code stripping, recursively mark atoms "live" +void Resolver::markLive(const Atom *atom) { + // Mark the atom is live. If it's already marked live, then stop recursion. + auto exists = _liveAtoms.insert(atom); + if (!exists.second) + return; + + // Mark all atoms it references as live + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) { + for (const Reference *ref : *defAtom) + markLive(ref->target()); + for (auto &p : llvm::make_range(_reverseRef.equal_range(defAtom))) { + const Atom *target = p.second; + markLive(target); + } + } +} + +static bool isBackref(const Reference *ref) { + if (ref->kindNamespace() != lld::Reference::KindNamespace::all) + return false; + return (ref->kindValue() == lld::Reference::kindLayoutAfter); +} + +// remove all atoms not actually used +void Resolver::deadStripOptimize() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Dead stripping unused atoms:\n"); + ScopedTask task(getDefaultDomain(), "deadStripOptimize"); + // only do this optimization with -dead_strip + if (!_ctx.deadStrip()) + return; + + // Some type of references prevent referring atoms to be dead-striped. + // Make a reverse map of such references before traversing the graph. + // While traversing the list of atoms, mark AbsoluteAtoms as live + // in order to avoid reclaim. + for (const OwningAtomPtr<Atom> &atom : _atoms) { + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) + for (const Reference *ref : *defAtom) + if (isBackref(ref)) + _reverseRef.insert(std::make_pair(ref->target(), atom.get())); + if (const AbsoluteAtom *absAtom = dyn_cast<AbsoluteAtom>(atom.get())) + markLive(absAtom); + } + + // By default, shared libraries are built with all globals as dead strip roots + if (_ctx.globalsAreDeadStripRoots()) + for (const OwningAtomPtr<Atom> &atom : _atoms) + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom.get())) + if (defAtom->scope() == DefinedAtom::scopeGlobal) + _deadStripRoots.insert(defAtom); + + // Or, use list of names that are dead strip roots. + for (const StringRef &name : _ctx.deadStripRoots()) { + const Atom *symAtom = _symbolTable.findByName(name); + assert(symAtom); + _deadStripRoots.insert(symAtom); + } + + // mark all roots as live, and recursively all atoms they reference + for (const Atom *dsrAtom : _deadStripRoots) + markLive(dsrAtom); + + // now remove all non-live atoms from _atoms + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr<Atom> &a) { + return _liveAtoms.count(a.get()) == 0; + }), + _atoms.end()); +} + +// error out if some undefines remain +bool Resolver::checkUndefines() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Checking for undefines:\n"); + + // build vector of remaining undefined symbols + std::vector<const UndefinedAtom *> undefinedAtoms = _symbolTable.undefines(); + if (_ctx.deadStrip()) { + // When dead code stripping, we don't care if dead atoms are undefined. + undefinedAtoms.erase( + std::remove_if(undefinedAtoms.begin(), undefinedAtoms.end(), + [&](const Atom *a) { return _liveAtoms.count(a) == 0; }), + undefinedAtoms.end()); + } + + if (undefinedAtoms.empty()) + return false; + + // Warn about unresolved symbols. + bool foundUndefines = false; + for (const UndefinedAtom *undef : undefinedAtoms) { + // Skip over a weak symbol. + if (undef->canBeNull() != UndefinedAtom::canBeNullNever) + continue; + + // If this is a library and undefined symbols are allowed on the + // target platform, skip over it. + if (isa<SharedLibraryFile>(undef->file()) && _ctx.allowShlibUndefines()) + continue; + + // If the undefine is coalesced away, skip over it. + if (_symbolTable.isCoalescedAway(undef)) + continue; + + // Seems like this symbol is undefined. Warn that. + foundUndefines = true; + if (_ctx.printRemainingUndefines()) { + llvm::errs() << "Undefined symbol: " << undef->file().path() + << ": " << _ctx.demangle(undef->name()) + << "\n"; + } + } + if (!foundUndefines) + return false; + if (_ctx.printRemainingUndefines()) + llvm::errs() << "symbol(s) not found\n"; + return true; +} + +// remove from _atoms all coaleseced away atoms +void Resolver::removeCoalescedAwayAtoms() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Removing coalesced away atoms:\n"); + ScopedTask task(getDefaultDomain(), "removeCoalescedAwayAtoms"); + _atoms.erase(std::remove_if(_atoms.begin(), _atoms.end(), + [&](OwningAtomPtr<Atom> &a) { + return _symbolTable.isCoalescedAway(a.get()) || + _deadAtoms.count(a.get()); + }), + _atoms.end()); +} + +bool Resolver::resolve() { + DEBUG_WITH_TYPE("resolver", + llvm::dbgs() << "******** Resolving atom references:\n"); + if (!resolveUndefines()) + return false; + updateReferences(); + deadStripOptimize(); + if (checkUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Found undefines... "); + if (!_ctx.allowRemainingUndefines()) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we don't allow\n"); + return false; + } + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "which we are ok with\n"); + } + removeCoalescedAwayAtoms(); + _result->addAtoms(_atoms); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "******** Finished resolver\n"); + return true; +} + +void Resolver::MergedFile::addAtoms( + llvm::MutableArrayRef<OwningAtomPtr<Atom>> all) { + ScopedTask task(getDefaultDomain(), "addAtoms"); + DEBUG_WITH_TYPE("resolver", llvm::dbgs() << "Resolver final atom list:\n"); + + for (OwningAtomPtr<Atom> &atom : all) { +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast<DefinedAtom>(atom.get())) { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", definedAtom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("resolver", llvm::dbgs() + << llvm::format(" 0x%09lX", atom.get()) + << ", name=" + << atom.get()->name() + << "\n"); + } +#endif + addAtom(*atom.release()); + } +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/SymbolTable.cpp b/gnu/llvm/tools/lld/lib/Core/SymbolTable.cpp new file mode 100644 index 00000000000..44631a5d40d --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/SymbolTable.cpp @@ -0,0 +1,319 @@ +//===- Core/SymbolTable.cpp - Main Symbol Table ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/SymbolTable.h" +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/UndefinedAtom.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include <algorithm> +#include <cassert> +#include <cstdlib> +#include <vector> + +namespace lld { +bool SymbolTable::add(const UndefinedAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const SharedLibraryAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const AbsoluteAtom &atom) { return addByName(atom); } + +bool SymbolTable::add(const DefinedAtom &atom) { + if (!atom.name().empty() && + atom.scope() != DefinedAtom::scopeTranslationUnit) { + // Named atoms cannot be merged by content. + assert(atom.merge() != DefinedAtom::mergeByContent); + // Track named atoms that are not scoped to file (static). + return addByName(atom); + } + if (atom.merge() == DefinedAtom::mergeByContent) { + // Named atoms cannot be merged by content. + assert(atom.name().empty()); + // Currently only read-only constants can be merged. + if (atom.permissions() == DefinedAtom::permR__) + return addByContent(atom); + // TODO: support mergeByContent of data atoms by comparing content & fixups. + } + return false; +} + +enum NameCollisionResolution { + NCR_First, + NCR_Second, + NCR_DupDef, + NCR_DupUndef, + NCR_DupShLib, + NCR_Error +}; + +static NameCollisionResolution cases[4][4] = { + //regular absolute undef sharedLib + { + // first is regular + NCR_DupDef, NCR_Error, NCR_First, NCR_First + }, + { + // first is absolute + NCR_Error, NCR_Error, NCR_First, NCR_First + }, + { + // first is undef + NCR_Second, NCR_Second, NCR_DupUndef, NCR_Second + }, + { + // first is sharedLib + NCR_Second, NCR_Second, NCR_First, NCR_DupShLib + } +}; + +static NameCollisionResolution collide(Atom::Definition first, + Atom::Definition second) { + return cases[first][second]; +} + +enum MergeResolution { + MCR_First, + MCR_Second, + MCR_Largest, + MCR_SameSize, + MCR_Error +}; + +static MergeResolution mergeCases[][6] = { + // no tentative weak weakAddress sameNameAndSize largest + {MCR_Error, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // no + {MCR_Second, MCR_Largest, MCR_Second, MCR_Second, MCR_SameSize, MCR_Largest}, // tentative + {MCR_Second, MCR_First, MCR_First, MCR_Second, MCR_SameSize, MCR_Largest}, // weak + {MCR_Second, MCR_First, MCR_First, MCR_First, MCR_SameSize, MCR_Largest}, // weakAddress + {MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize, MCR_SameSize}, // sameSize + {MCR_Largest, MCR_Largest, MCR_Largest, MCR_Largest, MCR_SameSize, MCR_Largest}, // largest +}; + +static MergeResolution mergeSelect(DefinedAtom::Merge first, + DefinedAtom::Merge second) { + assert(first != DefinedAtom::mergeByContent); + assert(second != DefinedAtom::mergeByContent); + return mergeCases[first][second]; +} + +bool SymbolTable::addByName(const Atom &newAtom) { + StringRef name = newAtom.name(); + assert(!name.empty()); + const Atom *existing = findByName(name); + if (existing == nullptr) { + // Name is not in symbol table yet, add it associate with this atom. + _nameTable[name] = &newAtom; + return true; + } + + // Do nothing if the same object is added more than once. + if (existing == &newAtom) + return false; + + // Name is already in symbol table and associated with another atom. + bool useNew = true; + switch (collide(existing->definition(), newAtom.definition())) { + case NCR_First: + useNew = false; + break; + case NCR_Second: + useNew = true; + break; + case NCR_DupDef: { + const auto *existingDef = cast<DefinedAtom>(existing); + const auto *newDef = cast<DefinedAtom>(&newAtom); + switch (mergeSelect(existingDef->merge(), newDef->merge())) { + case MCR_First: + useNew = false; + break; + case MCR_Second: + useNew = true; + break; + case MCR_Largest: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + useNew = (newSize >= existingSize); + break; + } + case MCR_SameSize: { + uint64_t existingSize = existingDef->sectionSize(); + uint64_t newSize = newDef->sectionSize(); + if (existingSize == newSize) { + useNew = true; + break; + } + llvm::errs() << "Size mismatch: " + << existing->name() << " (" << existingSize << ") " + << newAtom.name() << " (" << newSize << ")\n"; + // fallthrough + } + case MCR_Error: + llvm::errs() << "Duplicate symbols: " + << existing->name() + << ":" + << existing->file().path() + << " and " + << newAtom.name() + << ":" + << newAtom.file().path() + << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + break; + } + case NCR_DupUndef: { + const UndefinedAtom* existingUndef = cast<UndefinedAtom>(existing); + const UndefinedAtom* newUndef = cast<UndefinedAtom>(&newAtom); + + bool sameCanBeNull = (existingUndef->canBeNull() == newUndef->canBeNull()); + if (sameCanBeNull) + useNew = false; + else + useNew = (newUndef->canBeNull() < existingUndef->canBeNull()); + break; + } + case NCR_DupShLib: { + useNew = false; + break; + } + case NCR_Error: + llvm::errs() << "SymbolTable: error while merging " << name << "\n"; + llvm::report_fatal_error("duplicate symbol error"); + break; + } + + if (useNew) { + // Update name table to use new atom. + _nameTable[name] = &newAtom; + // Add existing atom to replacement table. + _replacedAtoms[existing] = &newAtom; + } else { + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + } + return false; +} + +unsigned SymbolTable::AtomMappingInfo::getHashValue(const DefinedAtom *atom) { + auto content = atom->rawContent(); + return llvm::hash_combine(atom->size(), + atom->contentType(), + llvm::hash_combine_range(content.begin(), + content.end())); +} + +bool SymbolTable::AtomMappingInfo::isEqual(const DefinedAtom * const l, + const DefinedAtom * const r) { + if (l == r) + return true; + if (l == getEmptyKey()) + return false; + if (r == getEmptyKey()) + return false; + if (l == getTombstoneKey()) + return false; + if (r == getTombstoneKey()) + return false; + if (l->contentType() != r->contentType()) + return false; + if (l->size() != r->size()) + return false; + if (l->sectionChoice() != r->sectionChoice()) + return false; + if (l->sectionChoice() == DefinedAtom::sectionCustomRequired) { + if (!l->customSectionName().equals(r->customSectionName())) + return false; + } + ArrayRef<uint8_t> lc = l->rawContent(); + ArrayRef<uint8_t> rc = r->rawContent(); + return memcmp(lc.data(), rc.data(), lc.size()) == 0; +} + +bool SymbolTable::addByContent(const DefinedAtom &newAtom) { + AtomContentSet::iterator pos = _contentTable.find(&newAtom); + if (pos == _contentTable.end()) { + _contentTable.insert(&newAtom); + return true; + } + const Atom* existing = *pos; + // New atom is not being used. Add it to replacement table. + _replacedAtoms[&newAtom] = existing; + return false; +} + +const Atom *SymbolTable::findByName(StringRef sym) { + NameToAtom::iterator pos = _nameTable.find(sym); + if (pos == _nameTable.end()) + return nullptr; + return pos->second; +} + +bool SymbolTable::isDefined(StringRef sym) { + if (const Atom *atom = findByName(sym)) + return !isa<UndefinedAtom>(atom); + return false; +} + +void SymbolTable::addReplacement(const Atom *replaced, + const Atom *replacement) { + _replacedAtoms[replaced] = replacement; +} + +const Atom *SymbolTable::replacement(const Atom *atom) { + // Find the replacement for a given atom. Atoms in _replacedAtoms + // may be chained, so find the last one. + for (;;) { + AtomToAtom::iterator pos = _replacedAtoms.find(atom); + if (pos == _replacedAtoms.end()) + return atom; + atom = pos->second; + } +} + +bool SymbolTable::isCoalescedAway(const Atom *atom) { + return _replacedAtoms.count(atom) > 0; +} + +std::vector<const UndefinedAtom *> SymbolTable::undefines() { + std::vector<const UndefinedAtom *> ret; + for (auto it : _nameTable) { + const Atom *atom = it.second; + assert(atom != nullptr); + if (const auto *undef = dyn_cast<const UndefinedAtom>(atom)) + if (_replacedAtoms.count(undef) == 0) + ret.push_back(undef); + } + return ret; +} + +std::vector<StringRef> SymbolTable::tentativeDefinitions() { + std::vector<StringRef> ret; + for (auto entry : _nameTable) { + const Atom *atom = entry.second; + StringRef name = entry.first; + assert(atom != nullptr); + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(atom)) + if (defAtom->merge() == DefinedAtom::mergeAsTentative) + ret.push_back(name); + } + return ret; +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/Core/Writer.cpp b/gnu/llvm/tools/lld/lib/Core/Writer.cpp new file mode 100644 index 00000000000..93e6438a28f --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Core/Writer.cpp @@ -0,0 +1,19 @@ +//===- lib/Core/Writer.cpp ------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" + +namespace lld { +Writer::Writer() { +} + +Writer::~Writer() { +} +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/Driver/CMakeLists.txt b/gnu/llvm/tools/lld/lib/Driver/CMakeLists.txt new file mode 100644 index 00000000000..1bd1f212581 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Driver/CMakeLists.txt @@ -0,0 +1,22 @@ +set(LLVM_TARGET_DEFINITIONS DarwinLdOptions.td) +tablegen(LLVM DarwinLdOptions.inc -gen-opt-parser-defs) +add_public_tablegen_target(DriverOptionsTableGen) + +add_lld_library(lldDriver + DarwinLdDriver.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/Driver + + LINK_LIBS + lldConfig + lldMachO + lldCore + lldReaderWriter + lldYAML + LLVMObject + LLVMOption + LLVMSupport + ) + +add_dependencies(lldDriver DriverOptionsTableGen) diff --git a/gnu/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp b/gnu/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp new file mode 100644 index 00000000000..496b651bab4 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Driver/DarwinLdDriver.cpp @@ -0,0 +1,1215 @@ +//===- lib/Driver/DarwinLdDriver.cpp --------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// +/// Concrete instance of the Driver for darwin's ld. +/// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Resolver.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Driver/Driver.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace lld; + +namespace { + +// Create enum with OPT_xxx values for each option in DarwinLdOptions.td +enum { + OPT_INVALID = 0, +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELP, META) \ + OPT_##ID, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create prefix string literals used in DarwinLdOptions.td +#define PREFIX(NAME, VALUE) const char *const NAME[] = VALUE; +#include "DarwinLdOptions.inc" +#undef PREFIX + +// Create table mapping all options defined in DarwinLdOptions.td +static const llvm::opt::OptTable::Info infoTable[] = { +#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ + HELPTEXT, METAVAR) \ + { PREFIX, NAME, HELPTEXT, METAVAR, OPT_##ID, llvm::opt::Option::KIND##Class, \ + PARAM, FLAGS, OPT_##GROUP, OPT_##ALIAS, ALIASARGS }, +#include "DarwinLdOptions.inc" +#undef OPTION +}; + +// Create OptTable class for parsing actual command line arguments +class DarwinLdOptTable : public llvm::opt::OptTable { +public: + DarwinLdOptTable() : OptTable(infoTable) {} +}; + +static std::vector<std::unique_ptr<File>> +makeErrorFile(StringRef path, std::error_code ec) { + std::vector<std::unique_ptr<File>> result; + result.push_back(llvm::make_unique<ErrorFile>(path, ec)); + return result; +} + +static std::vector<std::unique_ptr<File>> +parseMemberFiles(std::unique_ptr<File> file) { + std::vector<std::unique_ptr<File>> members; + if (auto *archive = dyn_cast<ArchiveLibraryFile>(file.get())) { + if (std::error_code ec = archive->parseAllMembers(members)) + return makeErrorFile(file->path(), ec); + } else { + members.push_back(std::move(file)); + } + return members; +} + +std::vector<std::unique_ptr<File>> +loadFile(MachOLinkingContext &ctx, StringRef path, + raw_ostream &diag, bool wholeArchive, bool upwardDylib) { + if (ctx.logInputFiles()) + diag << path << "\n"; + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = ctx.getMemoryBuffer(path); + if (std::error_code ec = mbOrErr.getError()) + return makeErrorFile(path, ec); + ErrorOr<std::unique_ptr<File>> fileOrErr = + ctx.registry().loadFile(std::move(mbOrErr.get())); + if (std::error_code ec = fileOrErr.getError()) + return makeErrorFile(path, ec); + std::unique_ptr<File> &file = fileOrErr.get(); + + // If file is a dylib, inform LinkingContext about it. + if (SharedLibraryFile *shl = dyn_cast<SharedLibraryFile>(file.get())) { + if (std::error_code ec = shl->parse()) + return makeErrorFile(path, ec); + ctx.registerDylib(reinterpret_cast<mach_o::MachODylibFile *>(shl), + upwardDylib); + } + if (wholeArchive) + return parseMemberFiles(std::move(file)); + std::vector<std::unique_ptr<File>> files; + files.push_back(std::move(file)); + return files; +} + +} // anonymous namespace + +// Test may be running on Windows. Canonicalize the path +// separator to '/' to get consistent outputs for tests. +static std::string canonicalizePath(StringRef path) { + char sep = llvm::sys::path::get_separator().front(); + if (sep != '/') { + std::string fixedPath = path; + std::replace(fixedPath.begin(), fixedPath.end(), sep, '/'); + return fixedPath; + } else { + return path; + } +} + +static void addFile(StringRef path, MachOLinkingContext &ctx, + bool loadWholeArchive, + bool upwardDylib, raw_ostream &diag) { + std::vector<std::unique_ptr<File>> files = + loadFile(ctx, path, diag, loadWholeArchive, upwardDylib); + for (std::unique_ptr<File> &file : files) + ctx.getNodes().push_back(llvm::make_unique<FileNode>(std::move(file))); +} + +// Export lists are one symbol per line. Blank lines are ignored. +// Trailing comments start with #. +static std::error_code parseExportsList(StringRef exportFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in export list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(exportFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(exportFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + StringRef sym = symAndComment.first.trim(); + if (!sym.empty()) + ctx.addExportSymbol(sym); + buffer = lineAndRest.second; + } + return std::error_code(); +} + + + +/// Order files are one symbol per line. Blank lines are ignored. +/// Trailing comments start with #. Symbol names can be prefixed with an +/// architecture name and/or .o leaf name. Examples: +/// _foo +/// bar.o:_bar +/// libfrob.a(bar.o):_bar +/// x86_64:_foo64 +static std::error_code parseOrderFile(StringRef orderFilePath, + MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Map in order file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(orderFilePath); + if (std::error_code ec = mb.getError()) + return ec; + ctx.addInputFileDependency(orderFilePath); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + buffer = lineAndRest.second; + // Ignore trailing # comments. + std::pair<StringRef, StringRef> symAndComment = line.split('#'); + if (symAndComment.first.empty()) + continue; + StringRef sym = symAndComment.first.trim(); + if (sym.empty()) + continue; + // Check for prefix. + StringRef prefix; + std::pair<StringRef, StringRef> prefixAndSym = sym.split(':'); + if (!prefixAndSym.second.empty()) { + sym = prefixAndSym.second; + prefix = prefixAndSym.first; + if (!prefix.endswith(".o") && !prefix.endswith(".o)")) { + // If arch name prefix does not match arch being linked, ignore symbol. + if (!ctx.archName().equals(prefix)) + continue; + prefix = ""; + } + } else + sym = prefixAndSym.first; + if (!sym.empty()) { + ctx.appendOrderedSymbol(sym, prefix); + //llvm::errs() << sym << ", prefix=" << prefix << "\n"; + } + } + return std::error_code(); +} + +// +// There are two variants of the -filelist option: +// +// -filelist <path> +// In this variant, the path is to a text file which contains one file path +// per line. There are no comments or trimming of whitespace. +// +// -fileList <path>,<dir> +// In this variant, the path is to a text file which contains a partial path +// per line. The <dir> prefix is prepended to each partial path. +// +static llvm::Error loadFileList(StringRef fileListPath, + MachOLinkingContext &ctx, bool forceLoad, + raw_ostream &diagnostics) { + // If there is a comma, split off <dir>. + std::pair<StringRef, StringRef> opt = fileListPath.split(','); + StringRef filePath = opt.first; + StringRef dirName = opt.second; + ctx.addInputFileDependency(filePath); + // Map in file list file. + ErrorOr<std::unique_ptr<MemoryBuffer>> mb = + MemoryBuffer::getFileOrSTDIN(filePath); + if (std::error_code ec = mb.getError()) + return llvm::errorCodeToError(ec); + StringRef buffer = mb->get()->getBuffer(); + while (!buffer.empty()) { + // Split off each line in the file. + std::pair<StringRef, StringRef> lineAndRest = buffer.split('\n'); + StringRef line = lineAndRest.first; + StringRef path; + if (!dirName.empty()) { + // If there is a <dir> then prepend dir to each line. + SmallString<256> fullPath; + fullPath.assign(dirName); + llvm::sys::path::append(fullPath, Twine(line)); + path = ctx.copy(fullPath.str()); + } else { + // No <dir> use whole line as input file path. + path = ctx.copy(line); + } + if (!ctx.pathExists(path)) { + return llvm::make_error<GenericError>(Twine("File not found '") + + path + + "'"); + } + if (ctx.testingFileUsage()) { + diagnostics << "Found filelist entry " << canonicalizePath(path) << '\n'; + } + addFile(path, ctx, forceLoad, false, diagnostics); + buffer = lineAndRest.second; + } + return llvm::Error(); +} + +/// Parse number assuming it is base 16, but allow 0x prefix. +static bool parseNumberBase16(StringRef numStr, uint64_t &baseAddress) { + if (numStr.startswith_lower("0x")) + numStr = numStr.drop_front(2); + return numStr.getAsInteger(16, baseAddress); +} + +static void parseLLVMOptions(const LinkingContext &ctx) { + // Honor -mllvm + if (!ctx.llvmOptions().empty()) { + unsigned numArgs = ctx.llvmOptions().size(); + auto **args = new const char *[numArgs + 2]; + args[0] = "lld (LLVM option parsing)"; + for (unsigned i = 0; i != numArgs; ++i) + args[i + 1] = ctx.llvmOptions()[i]; + args[numArgs + 1] = nullptr; + llvm::cl::ParseCommandLineOptions(numArgs + 1, args); + } +} + +namespace lld { +namespace mach_o { + +bool parse(llvm::ArrayRef<const char *> args, MachOLinkingContext &ctx, + raw_ostream &diagnostics) { + // Parse command line options using DarwinLdOptions.td + DarwinLdOptTable table; + unsigned missingIndex; + unsigned missingCount; + llvm::opt::InputArgList parsedArgs = + table.ParseArgs(args.slice(1), missingIndex, missingCount); + if (missingCount) { + diagnostics << "error: missing arg value for '" + << parsedArgs.getArgString(missingIndex) << "' expected " + << missingCount << " argument(s).\n"; + return false; + } + + for (auto unknownArg : parsedArgs.filtered(OPT_UNKNOWN)) { + diagnostics << "warning: ignoring unknown argument: " + << unknownArg->getAsString(parsedArgs) << "\n"; + } + + // Figure out output kind ( -dylib, -r, -bundle, -preload, or -static ) + llvm::MachO::HeaderFileType fileType = llvm::MachO::MH_EXECUTE; + bool isStaticExecutable = false; + if (llvm::opt::Arg *kind = parsedArgs.getLastArg( + OPT_dylib, OPT_relocatable, OPT_bundle, OPT_static, OPT_preload)) { + switch (kind->getOption().getID()) { + case OPT_dylib: + fileType = llvm::MachO::MH_DYLIB; + break; + case OPT_relocatable: + fileType = llvm::MachO::MH_OBJECT; + break; + case OPT_bundle: + fileType = llvm::MachO::MH_BUNDLE; + break; + case OPT_static: + fileType = llvm::MachO::MH_EXECUTE; + isStaticExecutable = true; + break; + case OPT_preload: + fileType = llvm::MachO::MH_PRELOAD; + break; + } + } + + // Handle -arch xxx + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + if (llvm::opt::Arg *archStr = parsedArgs.getLastArg(OPT_arch)) { + arch = MachOLinkingContext::archFromName(archStr->getValue()); + if (arch == MachOLinkingContext::arch_unknown) { + diagnostics << "error: unknown arch named '" << archStr->getValue() + << "'\n"; + return false; + } + } + // If no -arch specified, scan input files to find first non-fat .o file. + if (arch == MachOLinkingContext::arch_unknown) { + for (auto &inFile : parsedArgs.filtered(OPT_INPUT)) { + // This is expensive because it opens and maps the file. But that is + // ok because no -arch is rare. + if (MachOLinkingContext::isThinObjectFile(inFile->getValue(), arch)) + break; + } + if (arch == MachOLinkingContext::arch_unknown && + !parsedArgs.getLastArg(OPT_test_file_usage)) { + // If no -arch and no options at all, print usage message. + if (parsedArgs.size() == 0) + table.PrintHelp(llvm::outs(), args[0], "LLVM Linker", false); + else + diagnostics << "error: -arch not specified and could not be inferred\n"; + return false; + } + } + + // Handle -macosx_version_min or -ios_version_min + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + uint32_t minOSVersion = 0; + if (llvm::opt::Arg *minOS = + parsedArgs.getLastArg(OPT_macosx_version_min, OPT_ios_version_min, + OPT_ios_simulator_version_min)) { + switch (minOS->getOption().getID()) { + case OPT_macosx_version_min: + os = MachOLinkingContext::OS::macOSX; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed macosx_version_min value\n"; + return false; + } + break; + case OPT_ios_version_min: + os = MachOLinkingContext::OS::iOS; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_version_min value\n"; + return false; + } + break; + case OPT_ios_simulator_version_min: + os = MachOLinkingContext::OS::iOS_simulator; + if (MachOLinkingContext::parsePackedVersion(minOS->getValue(), + minOSVersion)) { + diagnostics << "error: malformed ios_simulator_version_min value\n"; + return false; + } + break; + } + } else { + // No min-os version on command line, check environment variables + } + + // Handle export_dynamic + // FIXME: Should we warn when this applies to something other than a static + // executable or dylib? Those are the only cases where this has an effect. + // Note, this has to come before ctx.configure() so that we get the correct + // value for _globalsAreDeadStripRoots. + bool exportDynamicSymbols = parsedArgs.hasArg(OPT_export_dynamic); + + // Now that there's enough information parsed in, let the linking context + // set up default values. + ctx.configure(fileType, arch, os, minOSVersion, exportDynamicSymbols); + + // Handle -e xxx + if (llvm::opt::Arg *entry = parsedArgs.getLastArg(OPT_entry)) + ctx.setEntrySymbolName(entry->getValue()); + + // Handle -o xxx + if (llvm::opt::Arg *outpath = parsedArgs.getLastArg(OPT_output)) + ctx.setOutputPath(outpath->getValue()); + else + ctx.setOutputPath("a.out"); + + // Handle -image_base XXX and -seg1addr XXXX + if (llvm::opt::Arg *imageBase = parsedArgs.getLastArg(OPT_image_base)) { + uint64_t baseAddress; + if (parseNumberBase16(imageBase->getValue(), baseAddress)) { + diagnostics << "error: image_base expects a hex number\n"; + return false; + } else if (baseAddress < ctx.pageZeroSize()) { + diagnostics << "error: image_base overlaps with __PAGEZERO\n"; + return false; + } else if (baseAddress % ctx.pageSize()) { + diagnostics << "error: image_base must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setBaseAddress(baseAddress); + } + + // Handle -dead_strip + if (parsedArgs.getLastArg(OPT_dead_strip)) + ctx.setDeadStripping(true); + + bool globalWholeArchive = false; + // Handle -all_load + if (parsedArgs.getLastArg(OPT_all_load)) + globalWholeArchive = true; + + // Handle -install_name + if (llvm::opt::Arg *installName = parsedArgs.getLastArg(OPT_install_name)) + ctx.setInstallName(installName->getValue()); + else + ctx.setInstallName(ctx.outputPath()); + + // Handle -mark_dead_strippable_dylib + if (parsedArgs.getLastArg(OPT_mark_dead_strippable_dylib)) + ctx.setDeadStrippableDylib(true); + + // Handle -compatibility_version and -current_version + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_compatibility_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -compatibility_version value is malformed\n"; + return false; + } + ctx.setCompatibilityVersion(parsedVers); + } + + if (llvm::opt::Arg *vers = parsedArgs.getLastArg(OPT_current_version)) { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "-current_version can only be used with -dylib\n"; + return false; + } + uint32_t parsedVers; + if (MachOLinkingContext::parsePackedVersion(vers->getValue(), parsedVers)) { + diagnostics << "error: -current_version value is malformed\n"; + return false; + } + ctx.setCurrentVersion(parsedVers); + } + + // Handle -bundle_loader + if (llvm::opt::Arg *loader = parsedArgs.getLastArg(OPT_bundle_loader)) + ctx.setBundleLoader(loader->getValue()); + + // Handle -sectalign segname sectname align + for (auto &alignArg : parsedArgs.filtered(OPT_sectalign)) { + const char* segName = alignArg->getValue(0); + const char* sectName = alignArg->getValue(1); + const char* alignStr = alignArg->getValue(2); + if ((alignStr[0] == '0') && (alignStr[1] == 'x')) + alignStr += 2; + unsigned long long alignValue; + if (llvm::getAsUnsignedInteger(alignStr, 16, alignValue)) { + diagnostics << "error: -sectalign alignment value '" + << alignStr << "' not a valid number\n"; + return false; + } + uint16_t align = 1 << llvm::countTrailingZeros(alignValue); + if (!llvm::isPowerOf2_64(alignValue)) { + diagnostics << "warning: alignment for '-sectalign " + << segName << " " << sectName + << llvm::format(" 0x%llX", alignValue) + << "' is not a power of two, using " + << llvm::format("0x%08X", align) << "\n"; + } + ctx.addSectionAlignment(segName, sectName, align); + } + + // Handle -mllvm + for (auto &llvmArg : parsedArgs.filtered(OPT_mllvm)) { + ctx.appendLLVMOption(llvmArg->getValue()); + } + + // Handle -print_atoms + if (parsedArgs.getLastArg(OPT_print_atoms)) + ctx.setPrintAtoms(); + + // Handle -t (trace) option. + if (parsedArgs.getLastArg(OPT_t)) + ctx.setLogInputFiles(true); + + // Handle -demangle option. + if (parsedArgs.getLastArg(OPT_demangle)) + ctx.setDemangleSymbols(true); + + // Handle -keep_private_externs + if (parsedArgs.getLastArg(OPT_keep_private_externs)) { + ctx.setKeepPrivateExterns(true); + if (ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + diagnostics << "warning: -keep_private_externs only used in -r mode\n"; + } + + // Handle -dependency_info <path> used by Xcode. + if (llvm::opt::Arg *depInfo = parsedArgs.getLastArg(OPT_dependency_info)) { + if (std::error_code ec = ctx.createDependencyFile(depInfo->getValue())) { + diagnostics << "warning: " << ec.message() + << ", processing '-dependency_info " + << depInfo->getValue() + << "'\n"; + } + } + + // In -test_file_usage mode, we'll be given an explicit list of paths that + // exist. We'll also be expected to print out information about how we located + // libraries and so on that the user specified, but not to actually do any + // linking. + if (parsedArgs.getLastArg(OPT_test_file_usage)) { + ctx.setTestingFileUsage(); + + // With paths existing by fiat, linking is not going to end well. + ctx.setDoNothing(true); + + // Only bother looking for an existence override if we're going to use it. + for (auto existingPath : parsedArgs.filtered(OPT_path_exists)) { + ctx.addExistingPathForDebug(existingPath->getValue()); + } + } + + // Register possible input file parsers. + if (!ctx.doNothing()) { + ctx.registry().addSupportMachOObjects(ctx); + ctx.registry().addSupportArchives(ctx.logInputFiles()); + ctx.registry().addSupportYamlFiles(); + } + + // Now construct the set of library search directories, following ld64's + // baroque set of accumulated hacks. Mostly, the algorithm constructs + // { syslibroots } x { libpaths } + // + // Unfortunately, there are numerous exceptions: + // 1. Only absolute paths get modified by syslibroot options. + // 2. If there is just 1 -syslibroot, system paths not found in it are + // skipped. + // 3. If the last -syslibroot is "/", all of them are ignored entirely. + // 4. If { syslibroots } x path == {}, the original path is kept. + std::vector<StringRef> sysLibRoots; + for (auto syslibRoot : parsedArgs.filtered(OPT_syslibroot)) { + sysLibRoots.push_back(syslibRoot->getValue()); + } + if (!sysLibRoots.empty()) { + // Ignore all if last -syslibroot is "/". + if (sysLibRoots.back() != "/") + ctx.setSysLibRoots(sysLibRoots); + } + + // Paths specified with -L come first, and are not considered system paths for + // the case where there is precisely 1 -syslibroot. + for (auto libPath : parsedArgs.filtered(OPT_L)) { + ctx.addModifiedSearchDir(libPath->getValue()); + } + + // Process -F directories (where to look for frameworks). + for (auto fwPath : parsedArgs.filtered(OPT_F)) { + ctx.addFrameworkSearchDir(fwPath->getValue()); + } + + // -Z suppresses the standard search paths. + if (!parsedArgs.hasArg(OPT_Z)) { + ctx.addModifiedSearchDir("/usr/lib", true); + ctx.addModifiedSearchDir("/usr/local/lib", true); + ctx.addFrameworkSearchDir("/Library/Frameworks", true); + ctx.addFrameworkSearchDir("/System/Library/Frameworks", true); + } + + // Now that we've constructed the final set of search paths, print out those + // search paths in verbose mode. + if (parsedArgs.getLastArg(OPT_v)) { + diagnostics << "Library search paths:\n"; + for (auto path : ctx.searchDirs()) { + diagnostics << " " << path << '\n'; + } + diagnostics << "Framework search paths:\n"; + for (auto path : ctx.frameworkDirs()) { + diagnostics << " " << path << '\n'; + } + } + + // Handle -exported_symbols_list <file> + for (auto expFile : parsedArgs.filtered(OPT_exported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbols_list cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-exported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -exported_symbol <symbol> + for (auto symbol : parsedArgs.filtered(OPT_exported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::blackList) { + diagnostics << "error: -exported_symbol cannot be combined " + << "with -unexported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::whiteList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle -unexported_symbols_list <file> + for (auto expFile : parsedArgs.filtered(OPT_unexported_symbols_list)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbols_list cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + if (std::error_code ec = parseExportsList(expFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-unexported_symbols_list " + << expFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -unexported_symbol <symbol> + for (auto symbol : parsedArgs.filtered(OPT_unexported_symbol)) { + if (ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) { + diagnostics << "error: -unexported_symbol cannot be combined " + << "with -exported_symbol[s_list]\n"; + return false; + } + ctx.setExportMode(MachOLinkingContext::ExportMode::blackList); + ctx.addExportSymbol(symbol->getValue()); + } + + // Handle obosolete -multi_module and -single_module + if (llvm::opt::Arg *mod = + parsedArgs.getLastArg(OPT_multi_module, OPT_single_module)) { + if (mod->getOption().getID() == OPT_multi_module) { + diagnostics << "warning: -multi_module is obsolete and being ignored\n"; + } + else { + if (ctx.outputMachOType() != llvm::MachO::MH_DYLIB) { + diagnostics << "warning: -single_module being ignored. " + "It is only for use when producing a dylib\n"; + } + } + } + + // Handle obsolete ObjC options: -objc_gc_compaction, -objc_gc, -objc_gc_only + if (parsedArgs.getLastArg(OPT_objc_gc_compaction)) { + diagnostics << "error: -objc_gc_compaction is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc)) { + diagnostics << "error: -objc_gc is not supported\n"; + return false; + } + + if (parsedArgs.getLastArg(OPT_objc_gc_only)) { + diagnostics << "error: -objc_gc_only is not supported\n"; + return false; + } + + // Handle -pie or -no_pie + if (llvm::opt::Arg *pie = parsedArgs.getLastArg(OPT_pie, OPT_no_pie)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + switch (ctx.os()) { + case MachOLinkingContext::OS::macOSX: + if ((minOSVersion < 0x000A0500) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "Mac OS X 10.5 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS: + if ((minOSVersion < 0x00040200) && + (pie->getOption().getID() == OPT_pie)) { + diagnostics << "-pie can only be used when targeting " + "iOS 4.2 or later\n"; + return false; + } + break; + case MachOLinkingContext::OS::iOS_simulator: + if (pie->getOption().getID() == OPT_no_pie) + diagnostics << "iOS simulator programs must be built PIE\n"; + return false; + break; + case MachOLinkingContext::OS::unknown: + break; + } + ctx.setPIE(pie->getOption().getID() == OPT_pie); + break; + case llvm::MachO::MH_PRELOAD: + break; + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + diagnostics << "warning: " << pie->getSpelling() << " being ignored. " + << "It is only used when linking main executables\n"; + break; + default: + diagnostics << pie->getSpelling() + << " can only used when linking main executables\n"; + return false; + break; + } + } + + // Handle -version_load_command or -no_version_load_command + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_version_load_command, + OPT_no_version_load_command)) { + flagOn = arg->getOption().getID() == OPT_version_load_command; + flagOff = arg->getOption().getID() == OPT_no_version_load_command; + } + + // default to adding version load command for dynamic code, + // static code must opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateVersionLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateVersionLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -function_starts or -no_function_starts + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_function_starts, + OPT_no_function_starts)) { + flagOn = arg->getOption().getID() == OPT_function_starts; + flagOff = arg->getOption().getID() == OPT_no_function_starts; + } + + // default to adding functions start for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + ctx.setGenerateFunctionStartsLoadCommand(false); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateFunctionStartsLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle -data_in_code_info or -no_data_in_code_info + { + bool flagOn = false; + bool flagOff = false; + if (auto *arg = parsedArgs.getLastArg(OPT_data_in_code_info, + OPT_no_data_in_code_info)) { + flagOn = arg->getOption().getID() == OPT_data_in_code_info; + flagOff = arg->getOption().getID() == OPT_no_data_in_code_info; + } + + // default to adding data in code for dynamic code, static code must + // opt-in + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_EXECUTE: + // dynamic executables default to generating a version load command, + // while static exectuables only generate it if required. + if (isStaticExecutable) { + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + } else { + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + } + break; + case llvm::MachO::MH_PRELOAD: + case llvm::MachO::MH_KEXT_BUNDLE: + if (flagOn) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_DYLINKER: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!flagOff) + ctx.setGenerateDataInCodeLoadCommand(true); + break; + case llvm::MachO::MH_FVMLIB: + case llvm::MachO::MH_DYLDLINK: + case llvm::MachO::MH_DYLIB_STUB: + case llvm::MachO::MH_DSYM: + // We don't generate load commands for these file types, even if + // forced on. + break; + } + } + + // Handle sdk_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_sdk_version)) { + uint32_t sdkVersion = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + sdkVersion)) { + diagnostics << "error: malformed sdkVersion value\n"; + return false; + } + ctx.setSdkVersion(sdkVersion); + } else if (ctx.generateVersionLoadCommand()) { + // If we don't have an sdk version, but were going to emit a load command + // with min_version, then we need to give an warning as we have no sdk + // version to put in that command. + // FIXME: We need to decide whether to make this an error. + diagnostics << "warning: -sdk_version is required when emitting " + "min version load command. " + "Setting sdk version to match provided min version\n"; + ctx.setSdkVersion(ctx.osMinVersion()); + } + + // Handle source_version + if (llvm::opt::Arg *arg = parsedArgs.getLastArg(OPT_source_version)) { + uint64_t version = 0; + if (MachOLinkingContext::parsePackedVersion(arg->getValue(), + version)) { + diagnostics << "error: malformed source_version value\n"; + return false; + } + ctx.setSourceVersion(version); + } + + // Handle stack_size + if (llvm::opt::Arg *stackSize = parsedArgs.getLastArg(OPT_stack_size)) { + uint64_t stackSizeVal; + if (parseNumberBase16(stackSize->getValue(), stackSizeVal)) { + diagnostics << "error: stack_size expects a hex number\n"; + return false; + } + if ((stackSizeVal % ctx.pageSize()) != 0) { + diagnostics << "error: stack_size must be a multiple of page size (" + << "0x" << llvm::utohexstr(ctx.pageSize()) << ")\n"; + return false; + } + + ctx.setStackSize(stackSizeVal); + } + + // Handle debug info handling options: -S + if (parsedArgs.hasArg(OPT_S)) + ctx.setDebugInfoMode(MachOLinkingContext::DebugInfoMode::noDebugMap); + + // Handle -order_file <file> + for (auto orderFile : parsedArgs.filtered(OPT_order_file)) { + if (std::error_code ec = parseOrderFile(orderFile->getValue(), ctx, + diagnostics)) { + diagnostics << "error: " << ec.message() + << ", processing '-order_file " + << orderFile->getValue() + << "'\n"; + return false; + } + } + + // Handle -flat_namespace. + if (llvm::opt::Arg *ns = + parsedArgs.getLastArg(OPT_flat_namespace, OPT_twolevel_namespace)) { + if (ns->getOption().getID() == OPT_flat_namespace) + ctx.setUseFlatNamespace(true); + } + + // Handle -undefined + if (llvm::opt::Arg *undef = parsedArgs.getLastArg(OPT_undefined)) { + MachOLinkingContext::UndefinedMode UndefMode; + if (StringRef(undef->getValue()).equals("error")) + UndefMode = MachOLinkingContext::UndefinedMode::error; + else if (StringRef(undef->getValue()).equals("warning")) + UndefMode = MachOLinkingContext::UndefinedMode::warning; + else if (StringRef(undef->getValue()).equals("suppress")) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + else if (StringRef(undef->getValue()).equals("dynamic_lookup")) + UndefMode = MachOLinkingContext::UndefinedMode::dynamicLookup; + else { + diagnostics << "error: invalid option to -undefined " + "[ warning | error | suppress | dynamic_lookup ]\n"; + return false; + } + + if (ctx.useFlatNamespace()) { + // If we're using -flat_namespace then 'warning', 'suppress' and + // 'dynamic_lookup' are all equivalent, so map them to 'suppress'. + if (UndefMode != MachOLinkingContext::UndefinedMode::error) + UndefMode = MachOLinkingContext::UndefinedMode::suppress; + } else { + // If we're using -twolevel_namespace then 'warning' and 'suppress' are + // illegal. Emit a diagnostic if they've been (mis)used. + if (UndefMode == MachOLinkingContext::UndefinedMode::warning || + UndefMode == MachOLinkingContext::UndefinedMode::suppress) { + diagnostics << "error: can't use -undefined warning or suppress with " + "-twolevel_namespace\n"; + return false; + } + } + + ctx.setUndefinedMode(UndefMode); + } + + // Handle -no_objc_category_merging. + if (parsedArgs.getLastArg(OPT_no_objc_category_merging)) + ctx.setMergeObjCCategories(false); + + // Handle -rpath <path> + if (parsedArgs.hasArg(OPT_rpath)) { + switch (ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + if (!ctx.minOS("10.5", "2.0")) { + if (ctx.os() == MachOLinkingContext::OS::macOSX) { + diagnostics << "error: -rpath can only be used when targeting " + "OS X 10.5 or later\n"; + } else { + diagnostics << "error: -rpath can only be used when targeting " + "iOS 2.0 or later\n"; + } + return false; + } + break; + default: + diagnostics << "error: -rpath can only be used when creating " + "a dynamic final linked image\n"; + return false; + } + + for (auto rPath : parsedArgs.filtered(OPT_rpath)) { + ctx.addRpath(rPath->getValue()); + } + } + + // Parse the LLVM options before we process files in case the file handling + // makes use of things like DEBUG(). + parseLLVMOptions(ctx); + + // Handle input files and sectcreate. + for (auto &arg : parsedArgs) { + bool upward; + llvm::Optional<StringRef> resolvedPath; + switch (arg->getOption().getID()) { + default: + continue; + case OPT_INPUT: + addFile(arg->getValue(), ctx, globalWholeArchive, false, diagnostics); + break; + case OPT_upward_library: + addFile(arg->getValue(), ctx, false, true, diagnostics); + break; + case OPT_force_load: + addFile(arg->getValue(), ctx, true, false, diagnostics); + break; + case OPT_l: + case OPT_upward_l: + upward = (arg->getOption().getID() == OPT_upward_l); + resolvedPath = ctx.searchLibrary(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find library for " << arg->getSpelling() + << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "library " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_framework: + case OPT_upward_framework: + upward = (arg->getOption().getID() == OPT_upward_framework); + resolvedPath = ctx.findPathForFramework(arg->getValue()); + if (!resolvedPath) { + diagnostics << "Unable to find framework for " + << arg->getSpelling() << " " << arg->getValue() << "\n"; + return false; + } else if (ctx.testingFileUsage()) { + diagnostics << "Found " << (upward ? "upward " : " ") << "framework " + << canonicalizePath(resolvedPath.getValue()) << '\n'; + } + addFile(resolvedPath.getValue(), ctx, globalWholeArchive, + upward, diagnostics); + break; + case OPT_filelist: + if (auto ec = loadFileList(arg->getValue(), + ctx, globalWholeArchive, + diagnostics)) { + handleAllErrors(std::move(ec), [&](const llvm::ErrorInfoBase &EI) { + diagnostics << "error: "; + EI.log(diagnostics); + diagnostics << ", processing '-filelist " << arg->getValue() << "'\n"; + }); + return false; + } + break; + case OPT_sectcreate: { + const char* seg = arg->getValue(0); + const char* sect = arg->getValue(1); + const char* fileName = arg->getValue(2); + + ErrorOr<std::unique_ptr<MemoryBuffer>> contentOrErr = + MemoryBuffer::getFile(fileName); + + if (!contentOrErr) { + diagnostics << "error: can't open -sectcreate file " << fileName << "\n"; + return false; + } + + ctx.addSectCreateSection(seg, sect, std::move(*contentOrErr)); + } + break; + } + } + + if (ctx.getNodes().empty()) { + diagnostics << "No input files\n"; + return false; + } + + // Validate the combination of options used. + return ctx.validate(diagnostics); +} + +/// This is where the link is actually performed. +bool link(llvm::ArrayRef<const char *> args, raw_ostream &diagnostics) { + MachOLinkingContext ctx; + if (!parse(args, ctx, diagnostics)) + return false; + if (ctx.doNothing()) + return true; + if (ctx.getNodes().empty()) + return false; + + for (std::unique_ptr<Node> &ie : ctx.getNodes()) + if (FileNode *node = dyn_cast<FileNode>(ie.get())) + node->getFile()->parse(); + + std::vector<std::unique_ptr<File>> internalFiles; + ctx.createInternalFiles(internalFiles); + for (auto i = internalFiles.rbegin(), e = internalFiles.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } + + // Give target a chance to add files. + std::vector<std::unique_ptr<File>> implicitFiles; + ctx.createImplicitFiles(implicitFiles); + for (auto i = implicitFiles.rbegin(), e = implicitFiles.rend(); i != e; ++i) { + auto &members = ctx.getNodes(); + members.insert(members.begin(), llvm::make_unique<FileNode>(std::move(*i))); + } + + // Give target a chance to postprocess input files. + // Mach-O uses this chance to move all object files before library files. + ctx.finalizeInputFiles(); + + // Do core linking. + ScopedTask resolveTask(getDefaultDomain(), "Resolve"); + Resolver resolver(ctx); + if (!resolver.resolve()) + return false; + SimpleFile *merged = nullptr; + { + std::unique_ptr<SimpleFile> mergedFile = resolver.resultFile(); + merged = mergedFile.get(); + auto &members = ctx.getNodes(); + members.insert(members.begin(), + llvm::make_unique<FileNode>(std::move(mergedFile))); + } + resolveTask.end(); + + // Run passes on linked atoms. + ScopedTask passTask(getDefaultDomain(), "Passes"); + PassManager pm; + ctx.addPasses(pm); + if (auto ec = pm.runOnFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to run passes on file '" << ctx.outputPath() + << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + passTask.end(); + + // Give linked atoms to Writer to generate output file. + ScopedTask writeTask(getDefaultDomain(), "Write"); + if (auto ec = ctx.writeFile(*merged)) { + // FIXME: This should be passed to logAllUnhandledErrors but it needs + // to be passed a Twine instead of a string. + diagnostics << "Failed to write file '" << ctx.outputPath() << "': "; + logAllUnhandledErrors(std::move(ec), diagnostics, std::string()); + return false; + } + + return true; +} +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/Driver/DarwinLdOptions.td b/gnu/llvm/tools/lld/lib/Driver/DarwinLdOptions.td new file mode 100644 index 00000000000..fa07f33646e --- /dev/null +++ b/gnu/llvm/tools/lld/lib/Driver/DarwinLdOptions.td @@ -0,0 +1,242 @@ +include "llvm/Option/OptParser.td" + + +// output kinds +def grp_kind : OptionGroup<"outs">, HelpText<"OUTPUT KIND">; +def relocatable : Flag<["-"], "r">, + HelpText<"Create relocatable object file">, Group<grp_kind>; +def static : Flag<["-"], "static">, + HelpText<"Create static executable">, Group<grp_kind>; +def dynamic : Flag<["-"], "dynamic">, + HelpText<"Create dynamic executable (default)">,Group<grp_kind>; +def dylib : Flag<["-"], "dylib">, + HelpText<"Create dynamic library">, Group<grp_kind>; +def bundle : Flag<["-"], "bundle">, + HelpText<"Create dynamic bundle">, Group<grp_kind>; +def execute : Flag<["-"], "execute">, + HelpText<"Create main executable (default)">, Group<grp_kind>; +def preload : Flag<["-"], "preload">, + HelpText<"Create binary for use with embedded systems">, Group<grp_kind>; + +// optimizations +def grp_opts : OptionGroup<"opts">, HelpText<"OPTIMIZATIONS">; +def dead_strip : Flag<["-"], "dead_strip">, + HelpText<"Remove unreference code and data">, Group<grp_opts>; +def macosx_version_min : Separate<["-"], "macosx_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum Mac OS X version">, Group<grp_opts>; +def ios_version_min : Separate<["-"], "ios_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS version">, Group<grp_opts>; +def iphoneos_version_min : Separate<["-"], "iphoneos_version_min">, + Alias<ios_version_min>; +def ios_simulator_version_min : Separate<["-"], "ios_simulator_version_min">, + MetaVarName<"<version>">, + HelpText<"Minimum iOS simulator version">, Group<grp_opts>; +def sdk_version : Separate<["-"], "sdk_version">, + MetaVarName<"<version>">, + HelpText<"SDK version">, Group<grp_opts>; +def source_version : Separate<["-"], "source_version">, + MetaVarName<"<version>">, + HelpText<"Source version">, Group<grp_opts>; +def version_load_command : Flag<["-"], "version_load_command">, + HelpText<"Force generation of a version load command">, Group<grp_opts>; +def no_version_load_command : Flag<["-"], "no_version_load_command">, + HelpText<"Disable generation of a version load command">, Group<grp_opts>; +def function_starts : Flag<["-"], "function_starts">, + HelpText<"Force generation of a function starts load command">, + Group<grp_opts>; +def no_function_starts : Flag<["-"], "no_function_starts">, + HelpText<"Disable generation of a function starts load command">, + Group<grp_opts>; +def data_in_code_info : Flag<["-"], "data_in_code_info">, + HelpText<"Force generation of a data in code load command">, + Group<grp_opts>; +def no_data_in_code_info : Flag<["-"], "no_data_in_code_info">, + HelpText<"Disable generation of a data in code load command">, + Group<grp_opts>; +def mllvm : Separate<["-"], "mllvm">, + MetaVarName<"<option>">, + HelpText<"Options to pass to LLVM during LTO">, Group<grp_opts>; +def exported_symbols_list : Separate<["-"], "exported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def exported_symbol : Separate<["-"], "exported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"Restricts which symbols will be exported">, Group<grp_opts>; +def unexported_symbols_list : Separate<["-"], "unexported_symbols_list">, + MetaVarName<"<file-path>">, + HelpText<"Lists symbols that should not be exported">, Group<grp_opts>; +def unexported_symbol : Separate<["-"], "unexported_symbol">, + MetaVarName<"<symbol>">, + HelpText<"A symbol which should not be exported">, Group<grp_opts>; +def keep_private_externs : Flag<["-"], "keep_private_externs">, + HelpText<"Private extern (hidden) symbols should not be transformed " + "into local symbols">, Group<grp_opts>; +def order_file : Separate<["-"], "order_file">, + MetaVarName<"<file-path>">, + HelpText<"re-order and move specified symbols to start of their section">, + Group<grp_opts>; +def flat_namespace : Flag<["-"], "flat_namespace">, + HelpText<"Resolves symbols in any (transitively) linked dynamic libraries. " + "Source libraries are not recorded: dyld will re-search all " + "images at runtime and use the first definition found.">, + Group<grp_opts>; +def twolevel_namespace : Flag<["-"], "twolevel_namespace">, + HelpText<"Resolves symbols in listed libraries only. Source libraries are " + "recorded in the symbol table.">, + Group<grp_opts>; +def undefined : Separate<["-"], "undefined">, + MetaVarName<"<undefined>">, + HelpText<"Determines how undefined symbols are handled.">, + Group<grp_opts>; +def no_objc_category_merging : Flag<["-"], "no_objc_category_merging">, + HelpText<"Disables the optimisation which merges Objective-C categories " + "on a class in to the class itself.">, + Group<grp_opts>; + +// main executable options +def grp_main : OptionGroup<"opts">, HelpText<"MAIN EXECUTABLE OPTIONS">; +def entry : Separate<["-"], "e">, + MetaVarName<"<entry-name>">, + HelpText<"entry symbol name">,Group<grp_main>; +def pie : Flag<["-"], "pie">, + HelpText<"Create Position Independent Executable (for ASLR)">, + Group<grp_main>; +def no_pie : Flag<["-"], "no_pie">, + HelpText<"Do not create Position Independent Executable">, + Group<grp_main>; +def stack_size : Separate<["-"], "stack_size">, + HelpText<"Specifies the maximum stack size for the main thread in a program. " + "Must be a page-size multiple. (default=8Mb)">, + Group<grp_main>; +def export_dynamic : Flag<["-"], "export_dynamic">, + HelpText<"Preserves all global symbols in main executables during LTO">, + Group<grp_main>; + +// dylib executable options +def grp_dylib : OptionGroup<"opts">, HelpText<"DYLIB EXECUTABLE OPTIONS">; +def install_name : Separate<["-"], "install_name">, + MetaVarName<"<path>">, + HelpText<"The dylib's install name">, Group<grp_dylib>; +def mark_dead_strippable_dylib : Flag<["-"], "mark_dead_strippable_dylib">, + HelpText<"Marks the dylib as having no side effects during initialization">, + Group<grp_dylib>; +def compatibility_version : Separate<["-"], "compatibility_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's compatibility version">, Group<grp_dylib>; +def current_version : Separate<["-"], "current_version">, + MetaVarName<"<version>">, + HelpText<"The dylib's current version">, Group<grp_dylib>; + +// dylib executable options - compatibility aliases +def dylib_install_name : Separate<["-"], "dylib_install_name">, + Alias<install_name>; +def dylib_compatibility_version : Separate<["-"], "dylib_compatibility_version">, + MetaVarName<"<version>">, Alias<compatibility_version>; +def dylib_current_version : Separate<["-"], "dylib_current_version">, + MetaVarName<"<version>">, Alias<current_version>; + +// bundle executable options +def grp_bundle : OptionGroup<"opts">, HelpText<"BUNDLE EXECUTABLE OPTIONS">; +def bundle_loader : Separate<["-"], "bundle_loader">, + MetaVarName<"<path>">, + HelpText<"The executable that will be loading this Mach-O bundle">, + Group<grp_bundle>; + +// library options +def grp_libs : OptionGroup<"libs">, HelpText<"LIBRARY OPTIONS">; +def L : JoinedOrSeparate<["-"], "L">, + MetaVarName<"<dir>">, + HelpText<"Add directory to library search path">, Group<grp_libs>; +def F : JoinedOrSeparate<["-"], "F">, + MetaVarName<"<dir>">, + HelpText<"Add directory to framework search path">, Group<grp_libs>; +def Z : Flag<["-"], "Z">, + HelpText<"Do not search standard directories for libraries or frameworks">; +def all_load : Flag<["-"], "all_load">, + HelpText<"Forces all members of all static libraries to be loaded">, + Group<grp_libs>; +def force_load : Separate<["-"], "force_load">, + MetaVarName<"<library-path>">, + HelpText<"Forces all members of specified static libraries to be loaded">, + Group<grp_libs>; +def syslibroot : Separate<["-"], "syslibroot">, MetaVarName<"<dir>">, + HelpText<"Add path to SDK to all absolute library search paths">, + Group<grp_libs>; + +// Input options +def l : Joined<["-"], "l">, + MetaVarName<"<libname>">, + HelpText<"Base name of library searched for in -L directories">; +def upward_l : Joined<["-"], "upward-l">, + MetaVarName<"<libname>">, + HelpText<"Base name of upward library searched for in -L directories">; +def framework : Separate<["-"], "framework">, + MetaVarName<"<name>">, + HelpText<"Base name of framework searched for in -F directories">; +def upward_framework : Separate<["-"], "upward_framework">, + MetaVarName<"<name>">, + HelpText<"Base name of upward framework searched for in -F directories">; +def upward_library : Separate<["-"], "upward_library">, + MetaVarName<"<path>">, + HelpText<"path to upward dylib to link with">; +def filelist : Separate<["-"], "filelist">, + MetaVarName<"<path>">, + HelpText<"file containing paths to input files">; + + +// test case options +def print_atoms : Flag<["-"], "print_atoms">, + HelpText<"Emit output as yaml atoms">; +def test_file_usage : Flag<["-"], "test_file_usage">, + HelpText<"Only files specified by -file_exists are considered to exist. " + "Print which files would be used">; +def path_exists : Separate<["-"], "path_exists">, + MetaVarName<"<path>">, + HelpText<"Used with -test_file_usage to declare a path">; + + +// general options +def output : Separate<["-"], "o">, + MetaVarName<"<path>">, + HelpText<"Output file path">; +def arch : Separate<["-"], "arch">, + MetaVarName<"<arch-name>">, + HelpText<"Architecture to link">; +def sectalign : MultiArg<["-"], "sectalign", 3>, + MetaVarName<"<segname> <sectname> <alignment>">, + HelpText<"Alignment for segment/section">; +def sectcreate : MultiArg<["-"], "sectcreate", 3>, + MetaVarName<"<segname> <sectname> <file>">, + HelpText<"Create section <segname>/<sectname> from contents of <file>">; +def image_base : Separate<["-"], "image_base">; +def seg1addr : Separate<["-"], "seg1addr">, Alias<image_base>; +def demangle : Flag<["-"], "demangle">, + HelpText<"Demangles symbol names in errors and warnings">; +def dependency_info : Separate<["-"], "dependency_info">, + MetaVarName<"<file>">, + HelpText<"Write binary list of files used during link">; +def S : Flag<["-"], "S">, + HelpText<"Remove debug information (STABS or DWARF) from the output file">; +def rpath : Separate<["-"], "rpath">, + MetaVarName<"<path>">, + HelpText<"Add path to the runpath search path list for image being created">; + +def t : Flag<["-"], "t">, + HelpText<"Print the names of the input files as ld processes them">; +def v : Flag<["-"], "v">, + HelpText<"Print linker information">; + +// Obsolete options +def grp_obsolete : OptionGroup<"obsolete">, HelpText<"OBSOLETE OPTIONS">; +def single_module : Flag<["-"], "single_module">, + HelpText<"Default for dylibs">, Group<grp_obsolete>; +def multi_module : Flag<["-"], "multi_module">, + HelpText<"Unsupported way to build dylibs">, Group<grp_obsolete>; +def objc_gc_compaction : Flag<["-"], "objc_gc_compaction">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; +def objc_gc : Flag<["-"], "objc_gc">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; +def objc_gc_only : Flag<["-"], "objc_gc_only">, + HelpText<"Unsupported ObjC GC option">, Group<grp_obsolete>; diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt b/gnu/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt new file mode 100644 index 00000000000..4408d9c18b8 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/CMakeLists.txt @@ -0,0 +1,19 @@ +add_subdirectory(MachO) +add_subdirectory(YAML) + +if (MSVC) + add_definitions(-wd4062) # Suppress 'warning C4062: Enumerator has no associated handler in a switch statement.' +endif() + +add_lld_library(lldReaderWriter + FileArchive.cpp + + ADDITIONAL_HEADER_DIRS + ${LLD_INCLUDE_DIR}/lld/ReaderWriter + + LINK_LIBS + lldCore + lldYAML + LLVMObject + LLVMSupport + ) diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp new file mode 100644 index 00000000000..eb7e7fb1837 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/FileArchive.cpp @@ -0,0 +1,222 @@ +//===- lib/ReaderWriter/FileArchive.cpp -----------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reader.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Object/Archive.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Object/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include <memory> +#include <set> +#include <string> +#include <system_error> +#include <unordered_map> +#include <utility> +#include <vector> + +using llvm::object::Archive; + +namespace lld { + +namespace { + +/// \brief The FileArchive class represents an Archive Library file +class FileArchive : public lld::ArchiveLibraryFile { +public: + FileArchive(std::unique_ptr<MemoryBuffer> mb, const Registry ®, + StringRef path, bool logLoading) + : ArchiveLibraryFile(path), _mb(std::shared_ptr<MemoryBuffer>(mb.release())), + _registry(reg), _logLoading(logLoading) {} + + /// \brief Check if any member of the archive contains an Atom with the + /// specified name and return the File object for that member, or nullptr. + File *find(StringRef name) override { + auto member = _symbolMemberMap.find(name); + if (member == _symbolMemberMap.end()) + return nullptr; + Archive::Child c = member->second; + + // Don't return a member already returned + ErrorOr<StringRef> buf = c.getBuffer(); + if (!buf) + return nullptr; + const char *memberStart = buf->data(); + if (_membersInstantiated.count(memberStart)) + return nullptr; + _membersInstantiated.insert(memberStart); + + std::unique_ptr<File> result; + if (instantiateMember(c, result)) + return nullptr; + + File *file = result.get(); + _filesReturned.push_back(std::move(result)); + + // Give up the file pointer. It was stored and will be destroyed with destruction of FileArchive + return file; + } + + /// \brief parse each member + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + if (std::error_code ec = parse()) + return ec; + llvm::Error err; + for (auto mf = _archive->child_begin(err), me = _archive->child_end(); + mf != me; ++mf) { + std::unique_ptr<File> file; + if (std::error_code ec = instantiateMember(*mf, file)) { + // err is Success (or we wouldn't be in the loop body) but we can't + // return without testing or consuming it. + consumeError(std::move(err)); + return ec; + } + result.push_back(std::move(file)); + } + if (err) + return errorToErrorCode(std::move(err)); + return std::error_code(); + } + + const AtomRange<DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +protected: + std::error_code doParse() override { + // Make Archive object which will be owned by FileArchive object. + llvm::Error Err; + _archive.reset(new Archive(_mb->getMemBufferRef(), Err)); + if (Err) + return errorToErrorCode(std::move(Err)); + std::error_code ec; + if ((ec = buildTableOfContents())) + return ec; + return std::error_code(); + } + +private: + std::error_code instantiateMember(Archive::Child member, + std::unique_ptr<File> &result) const { + ErrorOr<llvm::MemoryBufferRef> mbOrErr = member.getMemoryBufferRef(); + if (std::error_code ec = mbOrErr.getError()) + return ec; + llvm::MemoryBufferRef mb = mbOrErr.get(); + std::string memberPath = (_archive->getFileName() + "(" + + mb.getBufferIdentifier() + ")").str(); + + if (_logLoading) + llvm::errs() << memberPath << "\n"; + + std::unique_ptr<MemoryBuffer> memberMB(MemoryBuffer::getMemBuffer( + mb.getBuffer(), mb.getBufferIdentifier(), false)); + + ErrorOr<std::unique_ptr<File>> fileOrErr = + _registry.loadFile(std::move(memberMB)); + if (std::error_code ec = fileOrErr.getError()) + return ec; + result = std::move(fileOrErr.get()); + if (std::error_code ec = result->parse()) + return ec; + result->setArchivePath(_archive->getFileName()); + + // The memory buffer is co-owned by the archive file and the children, + // so that the bufffer is deallocated when all the members are destructed. + result->setSharedMemoryBuffer(_mb); + return std::error_code(); + } + + std::error_code buildTableOfContents() { + DEBUG_WITH_TYPE("FileArchive", llvm::dbgs() + << "Table of contents for archive '" + << _archive->getFileName() << "':\n"); + for (const Archive::Symbol &sym : _archive->symbols()) { + StringRef name = sym.getName(); + ErrorOr<Archive::Child> memberOrErr = sym.getMember(); + if (std::error_code ec = memberOrErr.getError()) + return ec; + Archive::Child member = memberOrErr.get(); + DEBUG_WITH_TYPE("FileArchive", + llvm::dbgs() + << llvm::format("0x%08llX ", + member.getBuffer()->data()) + << "'" << name << "'\n"); + _symbolMemberMap.insert(std::make_pair(name, member)); + } + return std::error_code(); + } + + typedef std::unordered_map<StringRef, Archive::Child> MemberMap; + typedef std::set<const char *> InstantiatedSet; + + std::shared_ptr<MemoryBuffer> _mb; + const Registry &_registry; + std::unique_ptr<Archive> _archive; + MemberMap _symbolMemberMap; + InstantiatedSet _membersInstantiated; + bool _logLoading; + std::vector<std::unique_ptr<MemoryBuffer>> _memberBuffers; + std::vector<std::unique_ptr<File>> _filesReturned; +}; + +class ArchiveReader : public Reader { +public: + ArchiveReader(bool logLoading) : _logLoading(logLoading) {} + + bool canParse(file_magic magic, MemoryBufferRef) const override { + return magic == llvm::sys::fs::file_magic::archive; + } + + ErrorOr<std::unique_ptr<File>> loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®) const override { + StringRef path = mb->getBufferIdentifier(); + std::unique_ptr<File> ret = + llvm::make_unique<FileArchive>(std::move(mb), reg, path, _logLoading); + return std::move(ret); + } + +private: + bool _logLoading; +}; + +} // anonymous namespace + +void Registry::addSupportArchives(bool logLoading) { + add(std::unique_ptr<Reader>(new ArchiveReader(logLoading))); +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp new file mode 100644 index 00000000000..cb20907b3e3 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.cpp @@ -0,0 +1,172 @@ +//===- lib/FileFormat/MachO/ArchHandler.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + + +ArchHandler::ArchHandler() { +} + +ArchHandler::~ArchHandler() { +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create( + MachOLinkingContext::Arch arch) { + switch (arch) { + case MachOLinkingContext::arch_x86_64: + return create_x86_64(); + case MachOLinkingContext::arch_x86: + return create_x86(); + case MachOLinkingContext::arch_armv6: + case MachOLinkingContext::arch_armv7: + case MachOLinkingContext::arch_armv7s: + return create_arm(); + case MachOLinkingContext::arch_arm64: + return create_arm64(); + default: + llvm_unreachable("Unknown arch"); + } +} + + +bool ArchHandler::isLazyPointer(const Reference &ref) { + // A lazy bind entry is needed for a lazy pointer. + const StubInfo &info = stubInfo(); + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + if (ref.kindArch() != info.lazyPointerReferenceToFinal.arch) + return false; + return (ref.kindValue() == info.lazyPointerReferenceToFinal.kind); +} + + +ArchHandler::RelocPattern ArchHandler::relocPattern(const Relocation &reloc) { + assert((reloc.type & 0xFFF0) == 0); + uint16_t result = reloc.type; + if (reloc.scattered) + result |= rScattered; + if (reloc.pcRel) + result |= rPcRel; + if (reloc.isExtern) + result |= rExtern; + switch(reloc.length) { + case 0: + break; + case 1: + result |= rLength2; + break; + case 2: + result |= rLength4; + break; + case 3: + result |= rLength8; + break; + default: + llvm_unreachable("bad r_length"); + } + return result; +} + +normalized::Relocation +ArchHandler::relocFromPattern(ArchHandler::RelocPattern pattern) { + normalized::Relocation result; + result.offset = 0; + result.scattered = (pattern & rScattered); + result.type = (RelocationInfoType)(pattern & 0xF); + result.pcRel = (pattern & rPcRel); + result.isExtern = (pattern & rExtern); + result.value = 0; + result.symbol = 0; + switch (pattern & 0x300) { + case rLength1: + result.length = 0; + break; + case rLength2: + result.length = 1; + break; + case rLength4: + result.length = 2; + break; + case rLength8: + result.length = 3; + break; + } + return result; +} + +void ArchHandler::appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern) { + normalized::Relocation reloc = relocFromPattern(pattern); + reloc.offset = offset; + reloc.symbol = symbol; + reloc.value = value; + relocs.push_back(reloc); +} + + +int16_t ArchHandler::readS16(const uint8_t *addr, bool isBig) { + return read16(addr, isBig); +} + +int32_t ArchHandler::readS32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + +uint32_t ArchHandler::readU32(const uint8_t *addr, bool isBig) { + return read32(addr, isBig); +} + + int64_t ArchHandler::readS64(const uint8_t *addr, bool isBig) { + return read64(addr, isBig); +} + +bool ArchHandler::isDwarfCIE(bool isBig, const DefinedAtom *atom) { + assert(atom->contentType() == DefinedAtom::typeCFI); + if (atom->rawContent().size() < sizeof(uint32_t)) + return false; + uint32_t size = read32(atom->rawContent().data(), isBig); + + uint32_t idOffset = sizeof(uint32_t); + if (size == 0xffffffffU) + idOffset += sizeof(uint64_t); + + return read32(atom->rawContent().data() + idOffset, isBig) == 0; +} + +const Atom *ArchHandler::fdeTargetFunction(const DefinedAtom *fde) { + for (auto ref : *fde) { + if (ref->kindNamespace() == Reference::KindNamespace::mach_o && + ref->kindValue() == unwindRefToFunctionKind()) { + assert(ref->kindArch() == kindArch() && "unexpected Reference arch"); + return ref->target(); + } + } + + return nullptr; +} + +} // namespace mach_o +} // namespace lld + + + diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h new file mode 100644 index 00000000000..70a63bd1004 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler.h @@ -0,0 +1,319 @@ +//===- lib/FileFormat/MachO/ArchHandler.h ---------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ARCH_HANDLER_H +#define LLD_READER_WRITER_MACHO_ARCH_HANDLER_H + +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFile.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Error.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/Triple.h" + +namespace lld { +namespace mach_o { + +/// +/// The ArchHandler class handles all architecture specific aspects of +/// mach-o linking. +/// +class ArchHandler { +public: + virtual ~ArchHandler(); + + /// There is no public interface to subclasses of ArchHandler, so this + /// is the only way to instantiate an ArchHandler. + static std::unique_ptr<ArchHandler> create(MachOLinkingContext::Arch arch); + + /// Get (arch specific) kind strings used by Registry. + virtual const Registry::KindStrings *kindStrings() = 0; + + /// Convert mach-o Arch to Reference::KindArch. + virtual Reference::KindArch kindArch() = 0; + + /// Used by StubPass to update References to shared library functions + /// to be references to a stub. + virtual bool isCallSite(const Reference &) = 0; + + /// Used by GOTPass to locate GOT References + virtual bool isGOTAccess(const Reference &, bool &canBypassGOT) { + return false; + } + + /// Used by TLVPass to locate TLV References. + virtual bool isTLVAccess(const Reference &) const { return false; } + + /// Used by the TLVPass to update TLV References. + virtual void updateReferenceToTLV(const Reference *) {} + + /// Used by ShimPass to insert shims in branches that switch mode. + virtual bool isNonCallBranch(const Reference &) = 0; + + /// Used by GOTPass to update GOT References + virtual void updateReferenceToGOT(const Reference *, bool targetIsNowGOT) {} + + /// Does this architecture make use of __unwind_info sections for exception + /// handling? If so, it will need a separate pass to create them. + virtual bool needsCompactUnwind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// value, used in the __unwind_info section. + virtual Reference::KindValue imageOffsetKind() = 0; + + /// Returns the kind of reference to use to synthesize a 32-bit image-offset + /// indirect value. Used for personality functions in the __unwind_info + /// section. + virtual Reference::KindValue imageOffsetKindIndirect() = 0; + + /// Architecture specific compact unwind type that signals __eh_frame should + /// actually be used. + virtual uint32_t dwarfCompactUnwindType() = 0; + + /// Reference from an __eh_frame CIE atom to its personality function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToPersonalityFunctionKind() = 0; + + /// Reference from an __eh_frame FDE to the CIE it's based on. + virtual Reference::KindValue unwindRefToCIEKind() = 0; + + /// Reference from an __eh_frame FDE atom to the function it's + /// describing. Usually pointer-sized and PC-relative, but differs in whether + /// it needs to be in relocatable objects. + virtual Reference::KindValue unwindRefToFunctionKind() = 0; + + /// Reference from an __unwind_info entry of dwarfCompactUnwindType to the + /// required __eh_frame entry. On current architectures, the low 24 bits + /// represent the offset of the function's FDE entry from the start of + /// __eh_frame. + virtual Reference::KindValue unwindRefToEhFrameKind() = 0; + + /// Returns a pointer sized reference kind. On 64-bit targets this will + /// likely be something like pointer64, and pointer32 on 32-bit targets. + virtual Reference::KindValue pointerKind() = 0; + + virtual const Atom *fdeTargetFunction(const DefinedAtom *fde); + + /// Used by normalizedFromAtoms() to know where to generated rebasing and + /// binding info in final executables. + virtual bool isPointer(const Reference &) = 0; + + /// Used by normalizedFromAtoms() to know where to generated lazy binding + /// info in final executables. + virtual bool isLazyPointer(const Reference &); + + /// Returns true if the specified relocation is paired to the next relocation. + virtual bool isPairedReloc(const normalized::Relocation &) = 0; + + /// Prototype for a helper function. Given a sectionIndex and address, + /// finds the atom and offset with that atom of that address. + typedef std::function<llvm::Error (uint32_t sectionIndex, uint64_t addr, + const lld::Atom **, Reference::Addend *)> + FindAtomBySectionAndAddress; + + /// Prototype for a helper function. Given a symbolIndex, finds the atom + /// representing that symbol. + typedef std::function<llvm::Error (uint32_t symbolIndex, + const lld::Atom **)> FindAtomBySymbolIndex; + + /// Analyzes a relocation from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBigEndian, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Analyzes a pair of relocations from a .o file and returns the info + /// (kind, target, addend) needed to instantiate a Reference. + /// Two helper functions are passed as parameters to find the target atom + /// given a symbol index or address. + virtual llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) = 0; + + /// Prototype for a helper function. Given an atom, finds the symbol table + /// index for it in the output file. + typedef std::function<uint32_t (const Atom &atom)> FindSymbolIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the index + /// of the section that will contain the atom. + typedef std::function<uint32_t (const Atom &atom)> FindSectionIndexForAtom; + + /// Prototype for a helper function. Given an atom, finds the address + /// assigned to it in the output file. + typedef std::function<uint64_t (const Atom &atom)> FindAddressForAtom; + + /// Some architectures require local symbols on anonymous atoms. + virtual bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) { + return false; + } + + /// Copy raw content then apply all fixup References on an Atom. + virtual void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) = 0; + + /// Used in -r mode to convert a Reference to a mach-o relocation. + virtual void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations&) = 0; + + /// Add arch-specific References. + virtual void addAdditionalReferences(MachODefinedAtom &atom) { } + + // Add Reference for data-in-code marker. + virtual void addDataInCodeReference(MachODefinedAtom &atom, uint32_t atomOff, + uint16_t length, uint16_t kind) { } + + /// Returns true if the specificed Reference value marks the start or end + /// of a data-in-code range in an atom. + virtual bool isDataInCodeTransition(Reference::KindValue refKind) { + return false; + } + + /// Returns the Reference value for a Reference that marks that start of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) { + return 0; + } + + /// Returns the Reference value for a Reference that marks that end of + /// a data-in-code range. + virtual Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) { + return 0; + } + + /// Only relevant for 32-bit arm archs. + virtual bool isThumbFunction(const DefinedAtom &atom) { return false; } + + /// Only relevant for 32-bit arm archs. + virtual const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) { + llvm_unreachable("shims only support on arm"); + } + + /// Does a given unwind-cfi atom represent a CIE (as opposed to an FDE). + static bool isDwarfCIE(bool isBig, const DefinedAtom *atom); + + struct ReferenceInfo { + Reference::KindArch arch; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + struct OptionalRefInfo { + bool used; + uint16_t kind; + uint32_t offset; + int32_t addend; + }; + + /// Table of architecture specific information for creating stubs. + struct StubInfo { + const char* binderSymbolName; + ReferenceInfo lazyPointerReferenceToHelper; + ReferenceInfo lazyPointerReferenceToFinal; + ReferenceInfo nonLazyPointerReferenceToBinder; + uint8_t codeAlignment; + + uint32_t stubSize; + uint8_t stubBytes[16]; + ReferenceInfo stubReferenceToLP; + OptionalRefInfo optStubReferenceToLP; + + uint32_t stubHelperSize; + uint8_t stubHelperBytes[16]; + ReferenceInfo stubHelperReferenceToImm; + ReferenceInfo stubHelperReferenceToHelperCommon; + + DefinedAtom::ContentType stubHelperImageCacheContentType; + + uint32_t stubHelperCommonSize; + uint8_t stubHelperCommonAlignment; + uint8_t stubHelperCommonBytes[36]; + ReferenceInfo stubHelperCommonReferenceToCache; + OptionalRefInfo optStubHelperCommonReferenceToCache; + ReferenceInfo stubHelperCommonReferenceToBinder; + OptionalRefInfo optStubHelperCommonReferenceToBinder; + }; + + virtual const StubInfo &stubInfo() = 0; + +protected: + ArchHandler(); + + static std::unique_ptr<mach_o::ArchHandler> create_x86_64(); + static std::unique_ptr<mach_o::ArchHandler> create_x86(); + static std::unique_ptr<mach_o::ArchHandler> create_arm(); + static std::unique_ptr<mach_o::ArchHandler> create_arm64(); + + // Handy way to pack mach-o r_type and other bit fields into one 16-bit value. + typedef uint16_t RelocPattern; + enum { + rScattered = 0x8000, + rPcRel = 0x4000, + rExtern = 0x2000, + rLength1 = 0x0000, + rLength2 = 0x0100, + rLength4 = 0x0200, + rLength8 = 0x0300, + rLenArmLo = rLength1, + rLenArmHi = rLength2, + rLenThmbLo = rLength4, + rLenThmbHi = rLength8 + }; + /// Extract RelocPattern from normalized mach-o relocation. + static RelocPattern relocPattern(const normalized::Relocation &reloc); + /// Create normalized Relocation initialized from pattern. + static normalized::Relocation relocFromPattern(RelocPattern pattern); + /// One liner to add a relocation. + static void appendReloc(normalized::Relocations &relocs, uint32_t offset, + uint32_t symbol, uint32_t value, + RelocPattern pattern); + + + static int16_t readS16(const uint8_t *addr, bool isBig); + static int32_t readS32(const uint8_t *addr, bool isBig); + static uint32_t readU32(const uint8_t *addr, bool isBig); + static int64_t readS64(const uint8_t *addr, bool isBig); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_ARCH_HANDLER_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp new file mode 100644 index 00000000000..3286fe06453 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm.cpp @@ -0,0 +1,1519 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::little32_t; + + +class ArchHandler_arm : public ArchHandler { +public: + ArchHandler_arm() = default; + ~ArchHandler_arm() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::ARM; } + + const ArchHandler::StubInfo &stubInfo() override; + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + bool isNonCallBranch(const Reference &) override; + + bool needsCompactUnwind() override { + return false; + } + Reference::KindValue imageOffsetKind() override { + return invalid; + } + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return invalid; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + // FIXME + return -1; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom, + FindSectionIndexForAtom, + FindAddressForAtom, + normalized::Relocations &) override; + + void addAdditionalReferences(MachODefinedAtom &atom) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeThumbCode: + case modeArmCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return atom.isThumb() ? modeThumbCode : modeArmCode; + } + + bool isThumbFunction(const DefinedAtom &atom) override; + const DefinedAtom *createShim(MachOFile &file, bool thumbToArm, + const DefinedAtom &) override; + +private: + friend class Thumb2ToArmShimAtom; + friend class ArmToThumbShimAtom; + + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfoArmPIC; + + enum ArmKind : Reference::KindValue { + invalid, /// for error condition + + modeThumbCode, /// Content starting at this offset is thumb. + modeArmCode, /// Content starting at this offset is arm. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + thumb_bl22, /// ex: bl _foo + thumb_b22, /// ex: b _foo + thumb_movw, /// ex: movw r1, :lower16:_foo + thumb_movt, /// ex: movt r1, :lower16:_foo + thumb_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + thumb_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + arm_bl24, /// ex: bl _foo + arm_b24, /// ex: b _foo + arm_movw, /// ex: movw r1, :lower16:_foo + arm_movt, /// ex: movt r1, :lower16:_foo + arm_movw_funcRel, /// ex: movw r1, :lower16:(_foo-(L1+4)) + arm_movt_funcRel, /// ex: movt r1, :upper16:(_foo-(L1+4)) + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + // Utility functions for inspecting/updating instructions. + static bool isThumbMovw(uint32_t instruction); + static bool isThumbMovt(uint32_t instruction); + static bool isArmMovw(uint32_t instruction); + static bool isArmMovt(uint32_t instruction); + static int32_t getDisplacementFromThumbBranch(uint32_t instruction, uint32_t); + static int32_t getDisplacementFromArmBranch(uint32_t instruction); + static uint16_t getWordFromThumbMov(uint32_t instruction); + static uint16_t getWordFromArmMov(uint32_t instruction); + static uint32_t clearThumbBit(uint32_t value, const Atom *target); + static uint32_t setDisplacementInArmBranch(uint32_t instr, int32_t disp, + bool targetIsThumb); + static uint32_t setDisplacementInThumbBranch(uint32_t instr, uint32_t ia, + int32_t disp, bool targetThumb); + static uint32_t setWordFromThumbMov(uint32_t instruction, uint16_t word); + static uint32_t setWordFromArmMov(uint32_t instruction, uint16_t word); + + StringRef stubName(const DefinedAtom &); + bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, bool &thumbMode, + bool targetIsThumb); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_arm +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_arm::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeThumbCode), + LLD_KIND_STRING_ENTRY(modeArmCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(thumb_bl22), + LLD_KIND_STRING_ENTRY(thumb_b22), + LLD_KIND_STRING_ENTRY(thumb_movw), + LLD_KIND_STRING_ENTRY(thumb_movt), + LLD_KIND_STRING_ENTRY(thumb_movw_funcRel), + LLD_KIND_STRING_ENTRY(thumb_movt_funcRel), + LLD_KIND_STRING_ENTRY(arm_bl24), + LLD_KIND_STRING_ENTRY(arm_b24), + LLD_KIND_STRING_ENTRY(arm_movw), + LLD_KIND_STRING_ENTRY(arm_movt), + LLD_KIND_STRING_ENTRY(arm_movw_funcRel), + LLD_KIND_STRING_ENTRY(arm_movt_funcRel), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm::_sStubInfoArmPIC = { + "dyld_stub_binder", + + // References in lazy pointer + { Reference::KindArch::ARM, pointer32, 0, 0 }, + { Reference::KindArch::ARM, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::ARM, pointer32, 0, 0 }, + + // arm code alignment 2^2 + 2, + + // Stub size and code + 16, + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 12 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }, // .long L_foo$lazy_ptr - (L1$scv + 8) + { Reference::KindArch::ARM, delta32, 12, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 12, + { 0x00, 0xC0, 0x9F, 0xE5, // ldr ip, [pc, #0] + 0x00, 0x00, 0x00, 0xEA, // b _helperhelper + 0x00, 0x00, 0x00, 0x00 }, // .long lazy-info-offset + { Reference::KindArch::ARM, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::ARM, arm_b24, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 36, + // Stub helper alignment + 2, + { // push lazy-info-offset + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // push address of dyld_mageLoaderCache + 0x10, 0xC0, 0x9F, 0xE5, // ldr ip, L1 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x04, 0xC0, 0x2D, 0xE5, // str ip, [sp, #-4]! + // jump through dyld_stub_binder + 0x08, 0xC0, 0x9F, 0xE5, // ldr ip, L2 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x00, 0xF0, 0x9C, 0xE5, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00, // L1: .long fFastStubGOTAtom - (helper+16) + 0x00, 0x00, 0x00, 0x00 }, // L2: .long dyld_stub_binder - (helper+28) + { Reference::KindArch::ARM, delta32, 28, 0xC }, + { false, 0, 0, 0 }, + { Reference::KindArch::ARM, delta32, 32, 0x04 }, + { false, 0, 0, 0 } +}; + +const ArchHandler::StubInfo &ArchHandler_arm::stubInfo() { + // If multiple kinds of stubs are supported, select which StubInfo here. + return _sStubInfoArmPIC; +} + +bool ArchHandler_arm::isCallSite(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case thumb_bl22: + case arm_b24: + case arm_bl24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_arm::isNonCallBranch(const Reference &ref) { + switch (ref.kindValue()) { + case thumb_b22: + case arm_b24: + return true; + default: + return false; + } +} + +bool ArchHandler_arm::isPairedReloc(const Relocation &reloc) { + switch (reloc.type) { + case ARM_RELOC_SECTDIFF: + case ARM_RELOC_LOCAL_SECTDIFF: + case ARM_RELOC_HALF_SECTDIFF: + case ARM_RELOC_HALF: + return true; + default: + return false; + } +} + +/// Trace references from stub atom to lazy pointer to target and get its name. +StringRef ArchHandler_arm::stubName(const DefinedAtom &stubAtom) { + assert(stubAtom.contentType() == DefinedAtom::typeStub); + for (const Reference *ref : stubAtom) { + if (const DefinedAtom* lp = dyn_cast<DefinedAtom>(ref->target())) { + if (lp->contentType() != DefinedAtom::typeLazyPointer) + continue; + for (const Reference *ref2 : *lp) { + if (ref2->kindValue() != lazyPointer) + continue; + return ref2->target()->name(); + } + } + } + return "stub"; +} + +/// Extract displacement from an ARM b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromArmBranch(uint32_t instruction) { + // Sign-extend imm24 + int32_t displacement = (instruction & 0x00FFFFFF) << 2; + if ((displacement & 0x02000000) != 0) + displacement |= 0xFC000000; + // If this is BLX and H bit set, add 2. + if ((instruction & 0xFF000000) == 0xFB000000) + displacement += 2; + return displacement; +} + +/// Update an ARM b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInArmBranch(uint32_t instruction, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 33554428) && (displacement > (-33554432)) + && "arm branch out of range"); + bool is_blx = ((instruction & 0xF0000000) == 0xF0000000); + uint32_t newInstruction = (instruction & 0xFF000000); + uint32_t h = 0; + if (targetIsThumb) { + // Force use of BLX. + newInstruction = 0xFA000000; + if (!is_blx) { + assert(((instruction & 0xF0000000) == 0xE0000000) + && "no conditional arm blx"); + assert(((instruction & 0xFF000000) == 0xEB000000) + && "no arm pc-rel BX instruction"); + } + if (displacement & 2) + h = 1; + } + else { + // Force use of B/BL. + if (is_blx) + newInstruction = 0xEB000000; + } + newInstruction |= (h << 24) | ((displacement >> 2) & 0x00FFFFFF); + return newInstruction; +} + +/// Extract displacement from a thumb b/bl/blx instruction. +int32_t ArchHandler_arm::getDisplacementFromThumbBranch(uint32_t instruction, + uint32_t instrAddr) { + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + uint32_t s = (instruction >> 10) & 0x1; + uint32_t j1 = (instruction >> 29) & 0x1; + uint32_t j2 = (instruction >> 27) & 0x1; + uint32_t imm10 = instruction & 0x3FF; + uint32_t imm11 = (instruction >> 16) & 0x7FF; + uint32_t i1 = (j1 == s); + uint32_t i2 = (j2 == s); + uint32_t dis = + (s << 24) | (i1 << 23) | (i2 << 22) | (imm10 << 12) | (imm11 << 1); + int32_t sdis = dis; + int32_t result = s ? (sdis | 0xFE000000) : sdis; + if (is_blx && (instrAddr & 0x2)) { + // The thumb blx instruction always has low bit of imm11 as zero. The way + // a 2-byte aligned blx can branch to a 4-byte aligned ARM target is that + // the blx instruction always 4-byte aligns the pc before adding the + // displacement from the blx. We must emulate that when decoding this. + result -= 2; + } + return result; +} + +/// Update a thumb b/bl/blx instruction, switching bl <-> blx as needed. +uint32_t ArchHandler_arm::setDisplacementInThumbBranch(uint32_t instruction, + uint32_t instrAddr, + int32_t displacement, + bool targetIsThumb) { + assert((displacement <= 16777214) && (displacement > (-16777216)) + && "thumb branch out of range"); + bool is_bl = ((instruction & 0xD000F800) == 0xD000F000); + bool is_blx = ((instruction & 0xD000F800) == 0xC000F000); + bool is_b = ((instruction & 0xD000F800) == 0x9000F000); + uint32_t newInstruction = (instruction & 0xD000F800); + if (is_bl || is_blx) { + if (targetIsThumb) { + newInstruction = 0xD000F000; // Use bl + } else { + newInstruction = 0xC000F000; // Use blx + // See note in getDisplacementFromThumbBranch() about blx. + if (instrAddr & 0x2) + displacement += 2; + } + } else if (is_b) { + assert(targetIsThumb && "no pc-rel thumb branch instruction that " + "switches to arm mode"); + } + else { + llvm_unreachable("thumb branch22 reloc on a non-branch instruction"); + } + uint32_t s = (uint32_t)(displacement >> 24) & 0x1; + uint32_t i1 = (uint32_t)(displacement >> 23) & 0x1; + uint32_t i2 = (uint32_t)(displacement >> 22) & 0x1; + uint32_t imm10 = (uint32_t)(displacement >> 12) & 0x3FF; + uint32_t imm11 = (uint32_t)(displacement >> 1) & 0x7FF; + uint32_t j1 = (i1 == s); + uint32_t j2 = (i2 == s); + uint32_t nextDisp = (j1 << 13) | (j2 << 11) | imm11; + uint32_t firstDisp = (s << 10) | imm10; + newInstruction |= (nextDisp << 16) | firstDisp; + return newInstruction; +} + +bool ArchHandler_arm::isThumbMovw(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F240; +} + +bool ArchHandler_arm::isThumbMovt(uint32_t instruction) { + return (instruction & 0x8000FBF0) == 0x0000F2C0; +} + +bool ArchHandler_arm::isArmMovw(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03000000; +} + +bool ArchHandler_arm::isArmMovt(uint32_t instruction) { + return (instruction & 0x0FF00000) == 0x03400000; +} + +uint16_t ArchHandler_arm::getWordFromThumbMov(uint32_t instruction) { + assert(isThumbMovw(instruction) || isThumbMovt(instruction)); + uint32_t i = ((instruction & 0x00000400) >> 10); + uint32_t imm4 = (instruction & 0x0000000F); + uint32_t imm3 = ((instruction & 0x70000000) >> 28); + uint32_t imm8 = ((instruction & 0x00FF0000) >> 16); + return (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; +} + +uint16_t ArchHandler_arm::getWordFromArmMov(uint32_t instruction) { + assert(isArmMovw(instruction) || isArmMovt(instruction)); + uint32_t imm4 = ((instruction & 0x000F0000) >> 16); + uint32_t imm12 = (instruction & 0x00000FFF); + return (imm4 << 12) | imm12; +} + +uint32_t ArchHandler_arm::setWordFromThumbMov(uint32_t instr, uint16_t word) { + assert(isThumbMovw(instr) || isThumbMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t i = (word & 0x0800) >> 11; + uint32_t imm3 = (word & 0x0700) >> 8; + uint32_t imm8 = word & 0x00FF; + return (instr & 0x8F00FBF0) | imm4 | (i << 10) | (imm3 << 28) | (imm8 << 16); +} + +uint32_t ArchHandler_arm::setWordFromArmMov(uint32_t instr, uint16_t word) { + assert(isArmMovw(instr) || isArmMovt(instr)); + uint32_t imm4 = (word & 0xF000) >> 12; + uint32_t imm12 = word & 0x0FFF; + return (instr & 0xFFF0F000) | (imm4 << 16) | imm12; +} + +uint32_t ArchHandler_arm::clearThumbBit(uint32_t value, const Atom *target) { + // The assembler often adds one to the address of a thumb function. + // We need to undo that so it does not look like an addend. + if (value & 1) { + if (isa<DefinedAtom>(target)) { + const MachODefinedAtom *machoTarget = + reinterpret_cast<const MachODefinedAtom *>(target); + if (machoTarget->isThumb()) + value &= -2; // mask off thumb-bit + } + } + return value; +} + +llvm::Error ArchHandler_arm::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + int32_t displacement; + switch (relocPattern(reloc)) { + case ARM_THUMB_RELOC_BR22 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + *addend = fixupAddress + 4 + displacement; + return llvm::Error(); + case ARM_THUMB_RELOC_BR22 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if ((instruction & 0xD000F800) == 0x9000F000) + *kind = thumb_b22; + else + *kind = thumb_bl22; + displacement = getDisplacementFromThumbBranch(instruction, fixupAddress); + targetAddress = fixupAddress + 4 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error(); + case ARM_RELOC_BR24 | rPcRel | rExtern | rLength4: + // ex: bl _foo (and _foo is undefined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // Instruction contains branch to addend. + displacement = getDisplacementFromArmBranch(instruction); + *addend = fixupAddress + 8 + displacement; + return llvm::Error(); + case ARM_RELOC_BR24 | rPcRel | rLength4: + // ex: bl _foo (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ARM_RELOC_BR24 | rScattered | rPcRel | rLength4: + // ex: bl _foo+4 (and _foo is defined) + if (((instruction & 0x0F000000) == 0x0A000000) + && ((instruction & 0xF0000000) != 0xF0000000)) + *kind = arm_b24; + else + *kind = arm_bl24; + displacement = getDisplacementFromArmBranch(instruction); + targetAddress = fixupAddress + 8 + displacement; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + // reloc.value is target atom's address. Instruction contains branch + // to atom+addend. + *addend += (targetAddress - reloc.value); + return llvm::Error(); + case ARM_RELOC_VANILLA | rExtern | rLength4: + // ex: .long _foo (and _foo is undefined) + *kind = pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = instruction; + return llvm::Error(); + case ARM_RELOC_VANILLA | rLength4: + // ex: .long _foo (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(reloc.symbol, instruction, target, addend)) + return ec; + *addend = clearThumbBit((uint32_t) * addend, *target); + return llvm::Error(); + case ARM_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+a (and _foo is defined) + *kind = pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend += (clearThumbBit(instruction, *target) - reloc.value); + return llvm::Error(); + default: + return llvm::make_error<GenericError>("unsupported arm relocation type"); + } + return llvm::Error(); +} + +llvm::Error +ArchHandler_arm::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + bool pointerDiff = false; + bool funcRel; + bool top; + bool thumbReloc; + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbLo): + // ex: movw r1, :lower16:(_x-L1) [thumb mode] + *kind = thumb_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenThmbHi): + // ex: movt r1, :upper16:(_x-L1) [thumb mode] + *kind = thumb_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmLo): + // ex: movw r1, :lower16:(_x-L1) [arm mode] + *kind = arm_movw_funcRel; + funcRel = true; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rScattered | rLenArmHi): + // ex: movt r1, :upper16:(_x-L1) [arm mode] + *kind = arm_movt_funcRel; + funcRel = true; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_x+a [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_x+a [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_x+a [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rScattered | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_x+a [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbLo) << 16 | + ARM_RELOC_PAIR | rLenThmbLo): + // ex: movw r1, :lower16:_undef [thumb mode] + *kind = thumb_movw; + funcRel = false; + top = false; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenThmbHi) << 16 | + ARM_RELOC_PAIR | rLenThmbHi): + // ex: movt r1, :upper16:_undef [thumb mode] + *kind = thumb_movt; + funcRel = false; + top = true; + thumbReloc = true; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmLo) << 16 | + ARM_RELOC_PAIR | rLenArmLo): + // ex: movw r1, :lower16:_undef [arm mode] + *kind = arm_movw; + funcRel = false; + top = false; + thumbReloc = false; + break; + case ((ARM_RELOC_HALF | rExtern | rLenArmHi) << 16 | + ARM_RELOC_PAIR | rLenArmHi): + // ex: movt r1, :upper16:_undef [arm mode] + *kind = arm_movt; + funcRel = false; + top = true; + thumbReloc = false; + break; + case ((ARM_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + case ((ARM_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + ARM_RELOC_PAIR | rScattered | rLength4): + // ex: .long _foo - . + pointerDiff = true; + break; + default: + return llvm::make_error<GenericError>("unsupported arm relocation pair"); + } + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint32_t instruction = *(const ulittle32_t *)fixupContent; + uint32_t value; + uint32_t fromAddress; + uint32_t toAddress; + uint16_t instruction16; + uint16_t other16; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + if (pointerDiff) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (scatterable && (fromTarget != inAtom)) + return llvm::make_error<GenericError>( + "SECTDIFF relocation where subtrahend label is not in atom"); + *kind = delta32; + value = clearThumbBit(instruction, *target); + *addend = (int32_t)(value - (toAddress - fixupAddress)); + } else if (funcRel) { + toAddress = reloc1.value; + fromAddress = reloc2.value; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("ARM_RELOC_HALF_SECTDIFF relocation" + " where subtrahend label is not in atom"); + other16 = (reloc2.offset & 0xFFFF); + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + value = clearThumbBit(value, *target); + int64_t ta = (int64_t) value - (toAddress - fromAddress); + *addend = ta - offsetInFrom; + return llvm::Error(); + } else { + uint32_t sectIndex; + if (thumbReloc) { + if (top) { + if (!isThumbMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isThumbMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromThumbMov(instruction); + } + else { + if (top) { + if (!isArmMovt(instruction)) + return llvm::make_error<GenericError>("expected movt instruction"); + } + else { + if (!isArmMovw(instruction)) + return llvm::make_error<GenericError>("expected movw instruction"); + } + instruction16 = getWordFromArmMov(instruction); + } + other16 = (reloc2.offset & 0xFFFF); + if (top) + value = (instruction16 << 16) | other16; + else + value = (other16 << 16) | instruction16; + if (reloc1.isExtern) { + if (auto ec = atomFromSymbolIndex(reloc1.symbol, target)) + return ec; + *addend = value; + } else { + if (reloc1.scattered) { + toAddress = reloc1.value; + sectIndex = 0; + } else { + toAddress = value; + sectIndex = reloc1.symbol; + } + if (auto ec = atomFromAddr(sectIndex, toAddress, target, &offsetInTo)) + return ec; + *addend = value - toAddress; + } + } + + return llvm::Error(); +} + +void ArchHandler_arm::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + if (targetIsThumb) + value16 |= 1; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + if (targetIsThumb) + *loc32 = targetAddress + ref.addend() + 1; + else + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + if (targetIsThumb) + *loc32 = targetAddress - fixupAddress + ref.addend() + 1; + else + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + bool thumbMode = false; + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + bool targetIsThumb = false; + if (const DefinedAtom *defTarg = dyn_cast<DefinedAtom>(target)) { + targetAddress = findAddress(*target); + targetIsThumb = isThumbFunction(*defTarg); + } + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, + targetIsThumb); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, thumbMode, targetIsThumb); + } + } +} + +bool ArchHandler_arm::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_arm::applyFixupRelocatable(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool &thumbMode, + bool targetIsThumb) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + int32_t displacement; + uint16_t value16; + uint32_t value32; + bool targetIsUndef = isa<UndefinedAtom>(ref.target()); + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + thumbMode = true; + break; + case modeArmCode: + thumbMode = false; + break; + case modeData: + break; + case thumb_b22: + case thumb_bl22: + assert(thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 4)); + else + displacement = (targetAddress - (fixupAddress + 4)) + ref.addend(); + value32 = setDisplacementInThumbBranch(*loc32, fixupAddress, + displacement, + targetIsUndef || targetIsThumb); + *loc32 = value32; + break; + case thumb_movw: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt: + assert(thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movw_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case thumb_movt_funcRel: + assert(thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromThumbMov(*loc32, value16); + break; + case arm_b24: + case arm_bl24: + assert(!thumbMode); + if (useExternalReloc) + displacement = (ref.addend() - (fixupAddress + 8)); + else + displacement = (targetAddress - (fixupAddress + 8)) + ref.addend(); + value32 = setDisplacementInArmBranch(*loc32, displacement, + targetIsThumb); + *loc32 = value32; + break; + case arm_movw: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() & 0xFFFF; + else + value16 = (targetAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt: + assert(!thumbMode); + if (useExternalReloc) + value16 = ref.addend() >> 16; + else + value16 = (targetAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movw_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) & 0xFFFF; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case arm_movt_funcRel: + assert(!thumbMode); + value16 = (targetAddress - inAtomAddress + ref.addend()) >> 16; + *loc32 = setWordFromArmMov(*loc32, value16); + break; + case pointer32: + *loc32 = targetAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::ARM); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + uint32_t targetAtomAddress; + uint32_t fromAtomAddress; + uint16_t other16; + switch (static_cast<ArmKind>(ref.kindValue())) { + case modeThumbCode: + case modeArmCode: + case modeData: + // Do nothing. + break; + case thumb_b22: + case thumb_bl22: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_THUMB_RELOC_BR22 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_THUMB_RELOC_BR22 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_THUMB_RELOC_BR22 | rPcRel | rLength4); + } + break; + case thumb_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbLo); + } + } + break; + case thumb_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenThmbHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenThmbHi); + } + } + break; + case thumb_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbLo); + break; + case thumb_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenThmbHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenThmbHi); + break; + case arm_b24: + case arm_bl24: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_BR24 | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_BR24 | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_BR24 | rPcRel | rLength4); + } + break; + case arm_movw: + if (useExternalReloc) { + other16 = ref.addend() >> 16; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } else { + other16 = (targetAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmLo); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmLo); + } + } + break; + case arm_movt: + if (useExternalReloc) { + other16 = ref.addend() & 0xFFFF; + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_HALF | rExtern | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + targetAtomAddress = addressForAtom(*ref.target()); + if (ref.addend() != 0) { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } else { + other16 = (targetAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_HALF | rLenArmHi); + appendReloc(relocs, other16, 0, 0, + ARM_RELOC_PAIR | rLenArmHi); + } + } + break; + case arm_movw_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) >> 16; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmLo); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmLo); + break; + case arm_movt_funcRel: + fromAtomAddress = addressForAtom(atom); + targetAtomAddress = addressForAtom(*ref.target()); + other16 = (targetAtomAddress - fromAtomAddress + ref.addend()) & 0xFFFF; + appendReloc(relocs, sectionOffset, 0, targetAtomAddress, + ARM_RELOC_HALF_SECTDIFF | rScattered | rLenArmHi); + appendReloc(relocs, other16, 0, fromAtomAddress, + ARM_RELOC_PAIR | rScattered | rLenArmHi); + break; + case pointer32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM_RELOC_VANILLA | rExtern | rLength4); + } + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + ARM_RELOC_VANILLA | rLength4); + } + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + ARM_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + ARM_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid ARM Reference Kind"); + break; + } +} + +void ArchHandler_arm::addAdditionalReferences(MachODefinedAtom &atom) { + if (atom.isThumb()) { + atom.addReference(Reference::KindNamespace::mach_o, + Reference::KindArch::ARM, modeThumbCode, 0, &atom, 0); + } +} + +bool ArchHandler_arm::isThumbFunction(const DefinedAtom &atom) { + for (const Reference *ref : atom) { + if (ref->offsetInAtom() != 0) + return false; + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + assert(ref->kindArch() == Reference::KindArch::ARM); + if (ref->kindValue() == modeThumbCode) + return true; + } + return false; +} + +class Thumb2ToArmShimAtom : public SimpleDefinedAtom { +public: + Thumb2ToArmShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::modeThumbCode, 0, this, 0); + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 8, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~Thumb2ToArmShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 12; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0xDF, 0xF8, 0x04, 0xC0, // ldr ip, pc + 4 + 0xFF, 0x44, // add ip, pc, ip + 0x60, 0x47, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +class ArmToThumbShimAtom : public SimpleDefinedAtom { +public: + ArmToThumbShimAtom(MachOFile &file, StringRef targetName, + const DefinedAtom &target) + : SimpleDefinedAtom(file) { + addReference(Reference::KindNamespace::mach_o, Reference::KindArch::ARM, + ArchHandler_arm::delta32, 12, &target, 0); + std::string name = std::string(targetName) + "$shim"; + StringRef tmp(name); + _name = tmp.copy(file.allocator()); + } + + ~ArmToThumbShimAtom() override = default; + + StringRef name() const override { + return _name; + } + + ContentType contentType() const override { + return DefinedAtom::typeCode; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { + return 16; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t bytes[] = + { 0x04, 0xC0, 0x9F, 0xE5, // ldr ip, pc + 4 + 0x0C, 0xC0, 0x8F, 0xE0, // add ip, pc, ip + 0x1C, 0xFF, 0x2F, 0xE1, // ldr pc, [ip] + 0x00, 0x00, 0x00, 0x00 }; // .long target - this + assert(sizeof(bytes) == size()); + return llvm::makeArrayRef(bytes, sizeof(bytes)); + } +private: + StringRef _name; +}; + +const DefinedAtom *ArchHandler_arm::createShim(MachOFile &file, + bool thumbToArm, + const DefinedAtom &target) { + bool isStub = (target.contentType() == DefinedAtom::typeStub); + StringRef targetName = isStub ? stubName(target) : target.name(); + if (thumbToArm) + return new (file.allocator()) Thumb2ToArmShimAtom(file, targetName, target); + else + return new (file.allocator()) ArmToThumbShimAtom(file, targetName, target); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm()); +} + +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp new file mode 100644 index 00000000000..a61f6aac05e --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_arm64.cpp @@ -0,0 +1,898 @@ +//===- lib/FileFormat/MachO/ArchHandler_arm64.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_arm64 : public ArchHandler { +public: + ArchHandler_arm64() = default; + ~ArchHandler_arm64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::AArch64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + switch (ref.kindValue()) { + case gotPage21: + case gotOffset12: + canBypassGOT = true; + return true; + case delta32ToGOT: + canBypassGOT = false; + return true; + case unwindCIEToPersonalityFunction: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + /// Used by GOTPass to update GOT References. + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + // If GOT slot was instanciated, transform: + // gotPage21/gotOffset12 -> page21/offset12scale8 + // If GOT slot optimized away, transform: + // gotPage21/gotOffset12 -> page21/addOffset12 + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::AArch64); + switch (ref->kindValue()) { + case gotPage21: + const_cast<Reference *>(ref)->setKindValue(page21); + break; + case gotOffset12: + const_cast<Reference *>(ref)->setKindValue(targetNowGOT ? + offset12scale8 : addOffset12); + break; + case delta32ToGOT: + const_cast<Reference *>(ref)->setKindValue(delta32); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("Not a GOT reference"); + } + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return true; + } + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return unwindCIEToPersonalityFunction; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override { + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x03000000; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum Arm64Kind : Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch26, /// ex: bl _foo + page21, /// ex: adrp x1, _foo@PAGE + offset12, /// ex: ldrb w0, [x1, _foo@PAGEOFF] + offset12scale2, /// ex: ldrs w0, [x1, _foo@PAGEOFF] + offset12scale4, /// ex: ldr w0, [x1, _foo@PAGEOFF] + offset12scale8, /// ex: ldr x0, [x1, _foo@PAGEOFF] + offset12scale16, /// ex: ldr q0, [x1, _foo@PAGEOFF] + gotPage21, /// ex: adrp x1, _foo@GOTPAGE + gotOffset12, /// ex: ldr w0, [x1, _foo@GOTPAGEOFF] + tlvPage21, /// ex: adrp x1, _foo@TLVPAGE + tlvOffset12, /// ex: ldr w0, [x1, _foo@TLVPAGEOFF] + + pointer64, /// ex: .quad _foo + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + pointer64ToGOT, /// ex: .quad _foo@GOT + delta32ToGOT, /// ex: .long _foo@GOT - . + + // Kinds introduced by Passes: + addOffset12, /// Location contains LDR to change into ADD. + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindCIEToPersonalityFunction, /// Nearly delta32ToGOT, but cannot be + /// rematerialized in relocatable object + /// (yay for implicit contracts!). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + }; + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, bool targetUnnamed); + + // Utility functions for inspecting/updating instructions. + static uint32_t setDisplacementInBranch26(uint32_t instr, int32_t disp); + static uint32_t setDisplacementInADRP(uint32_t instr, int64_t disp); + static Arm64Kind offset12KindFromInstruction(uint32_t instr); + static uint32_t setImm12(uint32_t instr, uint32_t offset); +}; + +const Registry::KindStrings ArchHandler_arm64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(branch26), + LLD_KIND_STRING_ENTRY(page21), + LLD_KIND_STRING_ENTRY(offset12), + LLD_KIND_STRING_ENTRY(offset12scale2), + LLD_KIND_STRING_ENTRY(offset12scale4), + LLD_KIND_STRING_ENTRY(offset12scale8), + LLD_KIND_STRING_ENTRY(offset12scale16), + LLD_KIND_STRING_ENTRY(gotPage21), + LLD_KIND_STRING_ENTRY(gotOffset12), + LLD_KIND_STRING_ENTRY(tlvPage21), + LLD_KIND_STRING_ENTRY(tlvOffset12), + LLD_KIND_STRING_ENTRY(pointer64), + LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(pointer64ToGOT), + LLD_KIND_STRING_ENTRY(delta32ToGOT), + + LLD_KIND_STRING_ENTRY(addOffset12), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(imageOffset), + LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindCIEToPersonalityFunction), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_arm64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + { Reference::KindArch::AArch64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::AArch64, pointer64, 0, 0 }, + + // arm64 code alignment 2^2 + 2, + + // Stub size and code + 12, + { 0x10, 0x00, 0x00, 0x90, // ADRP X16, lazy_pointer@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16, lazy_pointer@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12scale8, 4, 0 }, + + // Stub Helper size and code + 12, + { 0x50, 0x00, 0x00, 0x18, // LDR W16, L0 + 0x00, 0x00, 0x00, 0x14, // LDR B helperhelper + 0x00, 0x00, 0x00, 0x00 }, // L0: .long 0 + { Reference::KindArch::AArch64, lazyImmediateLocation, 8, 0 }, + { Reference::KindArch::AArch64, branch26, 4, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeGOT, + + // Stub Helper-Common size and code + 24, + // Stub helper alignment + 2, + { 0x11, 0x00, 0x00, 0x90, // ADRP X17, dyld_ImageLoaderCache@page + 0x31, 0x02, 0x00, 0x91, // ADD X17, X17, dyld_ImageLoaderCache@pageoff + 0xF0, 0x47, 0xBF, 0xA9, // STP X16/X17, [SP, #-16]! + 0x10, 0x00, 0x00, 0x90, // ADRP X16, _fast_lazy_bind@page + 0x10, 0x02, 0x40, 0xF9, // LDR X16, [X16,_fast_lazy_bind@pageoff] + 0x00, 0x02, 0x1F, 0xD6 }, // BR X16 + { Reference::KindArch::AArch64, page21, 0, 0 }, + { true, offset12, 4, 0 }, + { Reference::KindArch::AArch64, page21, 12, 0 }, + { true, offset12scale8, 16, 0 } +}; + +bool ArchHandler_arm64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + return (ref.kindValue() == branch26); +} + +bool ArchHandler_arm64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::AArch64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64); +} + +bool ArchHandler_arm64::isPairedReloc(const Relocation &r) { + return ((r.type == ARM64_RELOC_ADDEND) || (r.type == ARM64_RELOC_SUBTRACTOR)); +} + +uint32_t ArchHandler_arm64::setDisplacementInBranch26(uint32_t instr, + int32_t displacement) { + assert((displacement <= 134217727) && (displacement > (-134217728)) && + "arm64 branch out of range"); + return (instr & 0xFC000000) | ((uint32_t)(displacement >> 2) & 0x03FFFFFF); +} + +uint32_t ArchHandler_arm64::setDisplacementInADRP(uint32_t instruction, + int64_t displacement) { + assert((displacement <= 0x100000000LL) && (displacement > (-0x100000000LL)) && + "arm64 ADRP out of range"); + assert(((instruction & 0x9F000000) == 0x90000000) && + "reloc not on ADRP instruction"); + uint32_t immhi = (displacement >> 9) & (0x00FFFFE0); + uint32_t immlo = (displacement << 17) & (0x60000000); + return (instruction & 0x9F00001F) | immlo | immhi; +} + +ArchHandler_arm64::Arm64Kind +ArchHandler_arm64::offset12KindFromInstruction(uint32_t instruction) { + if (instruction & 0x08000000) { + switch ((instruction >> 30) & 0x3) { + case 0: + if ((instruction & 0x04800000) == 0x04800000) + return offset12scale16; + return offset12; + case 1: + return offset12scale2; + case 2: + return offset12scale4; + case 3: + return offset12scale8; + } + } + return offset12; +} + +uint32_t ArchHandler_arm64::setImm12(uint32_t instruction, uint32_t offset) { + assert(((offset & 0xFFFFF000) == 0) && "imm12 offset out of range"); + uint32_t imm12 = offset << 10; + return (instruction & 0xFFC003FF) | imm12; +} + +llvm::Error ArchHandler_arm64::getReferenceInfo( + const Relocation &reloc, const DefinedAtom *inAtom, uint32_t offsetInAtom, + uint64_t fixupAddress, bool isBig, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc)) { + case ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4: + // ex: bl _foo + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@PAGEOFF] + *kind = offset12KindFromInstruction(*(const little32_t *)fixupContent); + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@GOTPAGE + *kind = gotPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@GOTPAGEOFF] + *kind = gotOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4: + // ex: adrp x1, _foo@TLVPAGE + *kind = tlvPage21; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4: + // ex: ldr x0, [x1, _foo@TLVPAGEOFF] + *kind = tlvOffset12; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_UNSIGNED | rExtern | rLength8: + // ex: .quad _foo + N + *kind = pointer64; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little64_t *)fixupContent; + return llvm::Error(); + case ARM64_RELOC_UNSIGNED | rLength8: + // ex: .quad Lfoo + N + *kind = pointer64; + return atomFromAddress(reloc.symbol, *(const little64_t *)fixupContent, + target, addend); + case ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8: + // ex: .quad _foo@GOT + *kind = pointer64ToGOT; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + case ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4: + // ex: .long _foo@GOT - . + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta32ToGOT. It may instead be unwindCIEToPersonalityFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindCIEToPersonalityFunction; + else + *kind = delta32ToGOT; + + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = 0; + return llvm::Error(); + default: + return llvm::make_error<GenericError>("unsupported arm64 relocation type"); + } +} + +llvm::Error ArchHandler_arm64::getPairReferenceInfo( + const normalized::Relocation &reloc1, const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, uint32_t offsetInAtom, uint64_t fixupAddress, + bool swap, bool scatterable, FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, Reference::KindValue *kind, + const lld::Atom **target, Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4): + // ex: bl _foo+8 + *kind = branch26; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4): + // ex: adrp x1, _foo@PAGE + *kind = page21; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error(); + case ((ARM64_RELOC_ADDEND | rLength4) << 16 | + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4): { + // ex: ldr w0, [x1, _foo@PAGEOFF] + uint32_t cont32 = (int32_t)*(const little32_t *)fixupContent; + *kind = offset12KindFromInstruction(cont32); + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = reloc1.symbol; + return llvm::Error(); + } + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength8): + // ex: .quad _foo - . + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + + // If we are in an .eh_frame section, then the kind of the relocation should + // not be delta64. It may instead be unwindFDEToFunction. + if (inAtom->contentType() == DefinedAtom::typeCFI) + *kind = unwindFDEToFunction; + else + *kind = delta64; + + // The offsets of the 2 relocations must match + if (reloc1.offset != reloc2.offset) + return llvm::make_error<GenericError>( + "paired relocs must have the same offset"); + *addend = (int64_t)*(const little64_t *)fixupContent + offsetInAtom; + return llvm::Error(); + case ((ARM64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + ARM64_RELOC_UNSIGNED | rExtern | rLength4): + // ex: .quad _foo - . + *kind = delta32; + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + offsetInAtom; + return llvm::Error(); + default: + return llvm::make_error<GenericError>("unsupported arm64 relocation pair"); + } +} + +void ArchHandler_arm64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. +#ifndef NDEBUG + if (atom.begin() != atom.end()) { + DEBUG_WITH_TYPE("atom-content", llvm::dbgs() + << "Applying fixups to atom:\n" + << " address=" + << llvm::format(" 0x%09lX", &atom) + << ", file=#" + << atom.file().ordinal() + << ", atom=#" + << atom.ordinal() + << ", name=" + << atom.name() + << ", type=" + << atom.contentType() + << "\n"); + } +#endif + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + bool targetUnnamed = target->name().empty(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, targetUnnamed); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], fixupAddress, + targetAddress, atomAddress, imageBaseAddress, + findSectionAddress); + } + } +} + +void ArchHandler_arm64::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + int32_t displacement; + uint32_t instruction; + uint32_t value32; + uint32_t value64; + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + displacement = (targetAddress - fixupAddress) + ref.addend(); + *loc32 = setDisplacementInBranch26(*loc32, displacement); + return; + case page21: + case gotPage21: + case tlvPage21: + displacement = + ((targetAddress + ref.addend()) & (-4096)) - (fixupAddress & (-4096)); + *loc32 = setDisplacementInADRP(*loc32, displacement); + return; + case offset12: + case gotOffset12: + case tlvOffset12: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + *loc32 = setImm12(*loc32, displacement); + return; + case offset12scale2: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x1) == 0) && + "scaled imm12 not accessing 2-byte aligneds"); + *loc32 = setImm12(*loc32, displacement >> 1); + return; + case offset12scale4: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x3) == 0) && + "scaled imm12 not accessing 4-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 2); + return; + case offset12scale8: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0x7) == 0) && + "scaled imm12 not accessing 8-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 3); + return; + case offset12scale16: + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + assert(((displacement & 0xF) == 0) && + "scaled imm12 not accessing 16-byte aligned"); + *loc32 = setImm12(*loc32, displacement >> 4); + return; + case addOffset12: + instruction = *loc32; + assert(((instruction & 0xFFC00000) == 0xF9400000) && + "GOT reloc is not an LDR instruction"); + displacement = (targetAddress + ref.addend()) & 0x00000FFF; + value32 = 0x91000000 | (instruction & 0x000003FF); + instruction = setImm12(value32, displacement); + *loc32 = instruction; + return; + case pointer64: + case pointer64ToGOT: + *loc64 = targetAddress + ref.addend(); + return; + case delta64: + case unwindFDEToFunction: + *loc64 = (targetAddress - fixupAddress) + ref.addend(); + return; + case delta32: + case delta32ToGOT: + case unwindCIEToPersonalityFunction: + *loc32 = (targetAddress - fixupAddress) + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case imageOffsetGot: + llvm_unreachable("imageOffsetGot should have been changed to imageOffset"); + break; + case unwindInfoToEhFrame: + value64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(value64 < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | value64; + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid arm64 Reference Kind"); +} + +void ArchHandler_arm64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress, + bool targetUnnamed) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + *loc32 = setDisplacementInBranch26(*loc32, 0); + return; + case page21: + case gotPage21: + case tlvPage21: + *loc32 = setDisplacementInADRP(*loc32, 0); + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + case gotOffset12: + case tlvOffset12: + *loc32 = setImm12(*loc32, 0); + return; + case pointer64: + if (targetUnnamed) + *loc64 = targetAddress + ref.addend(); + else + *loc64 = ref.addend(); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case unwindFDEToFunction: + // We don't emit unwindFDEToFunction in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc64 = targetAddress - fixupAddress; + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case negDelta32: + // We don't emit negDelta32 in -r mode as they are implicitly + // generated from the data in the __eh_frame section. So here we need + // to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case pointer64ToGOT: + *loc64 = 0; + return; + case delta32ToGOT: + *loc32 = inAtomAddress - fixupAddress; + return; + case unwindCIEToPersonalityFunction: + // We don't emit unwindCIEToPersonalityFunction in -r mode as they are + // implicitly generated from the data in the __eh_frame section. So here we + // need to use the targetAddress so that we can generate the full relocation + // when we parse again later. + *loc32 = targetAddress - fixupAddress; + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +void ArchHandler_arm64::appendSectionRelocations( + const DefinedAtom &atom, uint64_t atomSectionOffset, const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::AArch64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<Arm64Kind>(ref.kindValue())) { + case branch26: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_BRANCH26 | rPcRel | rExtern | rLength4); + } + return; + case page21: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGE21 | rPcRel | rExtern | rLength4); + } + return; + case offset12: + case offset12scale2: + case offset12scale4: + case offset12scale8: + case offset12scale16: + if (ref.addend()) { + appendReloc(relocs, sectionOffset, ref.addend(), 0, + ARM64_RELOC_ADDEND | rLength4); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } else { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_PAGEOFF12 | rExtern | rLength4); + } + return; + case gotPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case gotOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_GOT_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case tlvPage21: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGE21 | rPcRel | rExtern | rLength4); + return; + case tlvOffset12: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_TLVP_LOAD_PAGEOFF12 | rExtern | rLength4); + return; + case pointer64: + if (ref.target()->name().empty()) + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rLength8); + else + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength8); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + ARM64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case pointer64ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rExtern | rLength8); + return; + case delta32ToGOT: + assert(ref.addend() == 0); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + ARM64_RELOC_POINTER_TO_GOT | rPcRel | rExtern | rLength4); + return; + case addOffset12: + llvm_unreachable("lazy reference kind implies GOT pass was run"); + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + case imageOffset: + case imageOffsetGot: + llvm_unreachable("deltas from mach_header can only be in final images"); + case unwindCIEToPersonalityFunction: + case unwindFDEToFunction: + case unwindInfoToEhFrame: + case negDelta32: + // Do nothing. + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown arm64 Reference Kind"); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_arm64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_arm64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp new file mode 100644 index 00000000000..15f1f793b5d --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86.cpp @@ -0,0 +1,647 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86.cpp ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle16_t; +using llvm::support::ulittle32_t; + +using llvm::support::little16_t; +using llvm::support::little32_t; + +class ArchHandler_x86 : public ArchHandler { +public: + ArchHandler_x86() = default; + ~ArchHandler_x86() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { return Reference::KindArch::x86; } + + const StubInfo &stubInfo() override { return _sStubInfo; } + bool isCallSite(const Reference &) override; + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + bool needsCompactUnwind() override { + return false; + } + + Reference::KindValue imageOffsetKind() override { + return invalid; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return invalid; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return invalid; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return delta32; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return invalid; + } + + Reference::KindValue pointerKind() override { + return invalid; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + + bool isDataInCodeTransition(Reference::KindValue refKind) override { + switch (refKind) { + case modeCode: + case modeData: + return true; + default: + return false; + break; + } + } + + Reference::KindValue dataInCodeTransitionStart( + const MachODefinedAtom &atom) override { + return modeData; + } + + Reference::KindValue dataInCodeTransitionEnd( + const MachODefinedAtom &atom) override { + return modeCode; + } + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86Kind : Reference::KindValue { + invalid, /// for error condition + + modeCode, /// Content starting at this offset is code. + modeData, /// Content starting at this offset is data. + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + branch16, /// ex: callw _foo + abs32, /// ex: movl _foo, %eax + funcRel32, /// ex: movl _foo-L1(%eax), %eax + pointer32, /// ex: .long _foo + delta32, /// ex: .long _foo - . + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + }; + + static bool useExternalRelocationTo(const Atom &target); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +//===----------------------------------------------------------------------===// +// ArchHandler_x86 +//===----------------------------------------------------------------------===// + +const Registry::KindStrings ArchHandler_x86::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), + LLD_KIND_STRING_ENTRY(modeCode), + LLD_KIND_STRING_ENTRY(modeData), + LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(branch16), + LLD_KIND_STRING_ENTRY(abs32), + LLD_KIND_STRING_ENTRY(funcRel32), + LLD_KIND_STRING_ENTRY(pointer32), + LLD_KIND_STRING_ENTRY(delta32), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86, pointer32, 0, 0 }, + { Reference::KindArch::x86, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86, pointer32, 0, 0 }, + + // x86 code alignment + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86, abs32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 12, + // Stub helper alignment + 2, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushl $dyld_ImageLoaderCache + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *_fast_lazy_bind + 0x90 }, // nop + { Reference::KindArch::x86, abs32, 1, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86, abs32, 7, 0 }, + { false, 0, 0, 0 } +}; + +bool ArchHandler_x86::isCallSite(const Reference &ref) { + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86::isPointer(const Reference &ref) { + return (ref.kindValue() == pointer32); +} + +bool ArchHandler_x86::isPairedReloc(const Relocation &reloc) { + if (!reloc.scattered) + return false; + return (reloc.type == GENERIC_RELOC_LOCAL_SECTDIFF) || + (reloc.type == GENERIC_RELOC_SECTDIFF); +} + +llvm::Error +ArchHandler_x86::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + DefinedAtom::ContentPermissions perms; + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (relocPattern(reloc)) { + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength4: + // ex: call _foo (and _foo undefined) + *kind = branch32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 4 + (int32_t)*(const little32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength4: + // ex: call _foo (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4: + // ex: call _foo+n (and _foo defined) + *kind = branch32; + targetAddress = + fixupAddress + 4 + (int32_t) * (const little32_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rExtern | rLength2: + // ex: callw _foo (and _foo undefined) + *kind = branch16; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = fixupAddress + 2 + (int16_t)*(const little16_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rPcRel | rLength2: + // ex: callw _foo (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2: + // ex: callw _foo+n (and _foo defined) + *kind = branch16; + targetAddress = + fixupAddress + 2 + (int16_t) * (const little16_t *)fixupContent; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = targetAddress - reloc.value; + break; + case GENERIC_RELOC_VANILLA | rExtern | rLength4: + // ex: movl _foo, %eax (and _foo undefined) + // ex: .long _foo (and _foo undefined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const ulittle32_t *)fixupContent; + break; + case GENERIC_RELOC_VANILLA | rLength4: + // ex: movl _foo, %eax (and _foo defined) + // ex: .long _foo (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + targetAddress = *(const ulittle32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + break; + case GENERIC_RELOC_VANILLA | rScattered | rLength4: + // ex: .long _foo+n (and _foo defined) + perms = inAtom->permissions(); + *kind = + ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) ? abs32 + : pointer32; + if (auto ec = atomFromAddress(0, reloc.value, target, addend)) + return ec; + *addend = *(const ulittle32_t *)fixupContent - reloc.value; + break; + default: + return llvm::make_error<GenericError>("unsupported i386 relocation type"); + } + return llvm::Error(); +} + +llvm::Error +ArchHandler_x86::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddr, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + DefinedAtom::ContentPermissions perms = inAtom->permissions(); + uint32_t fromAddress; + uint32_t toAddress; + uint32_t value; + const lld::Atom *fromTarget; + Reference::Addend offsetInTo; + Reference::Addend offsetInFrom; + switch (relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((GENERIC_RELOC_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + case ((GENERIC_RELOC_LOCAL_SECTDIFF | rScattered | rLength4) << 16 | + GENERIC_RELOC_PAIR | rScattered | rLength4): + toAddress = reloc1.value; + fromAddress = reloc2.value; + value = *(const little32_t *)fixupContent; + if (auto ec = atomFromAddr(0, toAddress, target, &offsetInTo)) + return ec; + if (auto ec = atomFromAddr(0, fromAddress, &fromTarget, &offsetInFrom)) + return ec; + if (fromTarget != inAtom) { + if (*target != inAtom) + return llvm::make_error<GenericError>( + "SECTDIFF relocation where neither target is in atom"); + *kind = negDelta32; + *addend = toAddress - value - fromAddress; + *target = fromTarget; + } else { + if ((perms & DefinedAtom::permR_X) == DefinedAtom::permR_X) { + // SECTDIFF relocations are used in i386 codegen where the function + // prolog does a CALL to the next instruction which POPs the return + // address into EBX which becomes the pic-base register. The POP + // instruction is label the used for the subtrahend in expressions. + // The funcRel32 kind represents the 32-bit delta to some symbol from + // the start of the function (atom) containing the funcRel32. + *kind = funcRel32; + uint32_t ta = fromAddress + value - toAddress; + *addend = ta - offsetInFrom; + } else { + *kind = delta32; + *addend = fromAddress + value - toAddress; + } + } + return llvm::Error(); + break; + default: + return llvm::make_error<GenericError>("unsupported i386 relocation type"); + } +} + +void ArchHandler_x86::generateAtomContent(const DefinedAtom &atom, + bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } + } +} + +void ArchHandler_x86::applyFixupFinal(const Reference &ref, uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + break; + case branch16: + *loc32 = (targetAddress - (fixupAddress + 2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + // do nothing + break; + case lazyImmediateLocation: + *loc32 = ref.addend(); + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +void ArchHandler_x86::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + ulittle16_t *loc16 = reinterpret_cast<ulittle16_t *>(loc); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + switch (static_cast<X86Kind>(ref.kindValue())) { + case branch32: + if (useExternalReloc) + *loc32 = ref.addend() - (fixupAddress + 4); + else + *loc32 =(targetAddress - (fixupAddress+4)) + ref.addend(); + break; + case branch16: + if (useExternalReloc) + *loc16 = ref.addend() - (fixupAddress + 2); + else + *loc16 = (targetAddress - (fixupAddress+2)) + ref.addend(); + break; + case pointer32: + case abs32: + *loc32 = targetAddress + ref.addend(); + break; + case funcRel32: + *loc32 = targetAddress - inAtomAddress + ref.addend(); // FIXME + break; + case delta32: + *loc32 = targetAddress - fixupAddress + ref.addend(); + break; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + break; + case modeCode: + case modeData: + case lazyPointer: + case lazyImmediateLocation: + // do nothing + break; + case invalid: + llvm_unreachable("invalid x86 Reference Kind"); + break; + } +} + +bool ArchHandler_x86::useExternalRelocationTo(const Atom &target) { + // Undefined symbols are referenced via external relocations. + if (isa<UndefinedAtom>(&target)) + return true; + if (const DefinedAtom *defAtom = dyn_cast<DefinedAtom>(&target)) { + switch (defAtom->merge()) { + case DefinedAtom::mergeAsTentative: + // Tentative definitions are referenced via external relocations. + return true; + case DefinedAtom::mergeAsWeak: + case DefinedAtom::mergeAsWeakAndAddressUsed: + // Global weak-defs are referenced via external relocations. + return (defAtom->scope() == DefinedAtom::scopeGlobal); + default: + break; + } + } + // Everything else is reference via an internal relocation. + return false; +} + +void ArchHandler_x86::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + bool useExternalReloc = useExternalRelocationTo(*ref.target()); + switch (static_cast<X86Kind>(ref.kindValue())) { + case modeCode: + case modeData: + break; + case branch32: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength4); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength4); + } + break; + case branch16: + if (useExternalReloc) { + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rPcRel | rLength2); + } else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rPcRel | rLength2); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()),0, + GENERIC_RELOC_VANILLA | rPcRel | rLength2); + } + break; + case pointer32: + case abs32: + if (useExternalReloc) + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rExtern | rLength4); + else { + if (ref.addend() != 0) + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_VANILLA | rScattered | rLength4); + else + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + GENERIC_RELOC_VANILLA | rLength4); + } + break; + case funcRel32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) - ref.addend(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case delta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case negDelta32: + appendReloc(relocs, sectionOffset, 0, addressForAtom(atom) + + ref.offsetInAtom(), + GENERIC_RELOC_SECTDIFF | rScattered | rLength4); + appendReloc(relocs, sectionOffset, 0, addressForAtom(*ref.target()), + GENERIC_RELOC_PAIR | rScattered | rLength4); + break; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + break; + case invalid: + llvm_unreachable("unknown x86 Reference Kind"); + break; + } +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86()); +} + +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp new file mode 100644 index 00000000000..c36982a77b1 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ArchHandler_x86_64.cpp @@ -0,0 +1,858 @@ +//===- lib/FileFormat/MachO/ArchHandler_x86_64.cpp ------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "Atoms.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +namespace lld { +namespace mach_o { + +using llvm::support::ulittle32_t; +using llvm::support::ulittle64_t; + +using llvm::support::little32_t; +using llvm::support::little64_t; + +class ArchHandler_x86_64 : public ArchHandler { +public: + ArchHandler_x86_64() = default; + ~ArchHandler_x86_64() override = default; + + const Registry::KindStrings *kindStrings() override { return _sKindStrings; } + + Reference::KindArch kindArch() override { + return Reference::KindArch::x86_64; + } + + /// Used by GOTPass to locate GOT References + bool isGOTAccess(const Reference &ref, bool &canBypassGOT) override { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + switch (ref.kindValue()) { + case ripRel32GotLoad: + canBypassGOT = true; + return true; + case ripRel32Got: + canBypassGOT = false; + return true; + case imageOffsetGot: + canBypassGOT = false; + return true; + default: + return false; + } + } + + bool isTLVAccess(const Reference &ref) const override { + assert(ref.kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref.kindArch() == Reference::KindArch::x86_64); + return ref.kindValue() == ripRel32Tlv; + } + + void updateReferenceToTLV(const Reference *ref) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + assert(ref->kindValue() == ripRel32Tlv); + const_cast<Reference*>(ref)->setKindValue(ripRel32); + } + + /// Used by GOTPass to update GOT References + void updateReferenceToGOT(const Reference *ref, bool targetNowGOT) override { + assert(ref->kindNamespace() == Reference::KindNamespace::mach_o); + assert(ref->kindArch() == Reference::KindArch::x86_64); + + switch (ref->kindValue()) { + case ripRel32Got: + assert(targetNowGOT && "target must be GOT"); + case ripRel32GotLoad: + const_cast<Reference *>(ref) + ->setKindValue(targetNowGOT ? ripRel32 : ripRel32GotLoadNowLea); + break; + case imageOffsetGot: + const_cast<Reference *>(ref)->setKindValue(imageOffset); + break; + default: + llvm_unreachable("unknown GOT reference kind"); + } + } + + bool needsCompactUnwind() override { + return true; + } + + Reference::KindValue imageOffsetKind() override { + return imageOffset; + } + + Reference::KindValue imageOffsetKindIndirect() override { + return imageOffsetGot; + } + + Reference::KindValue unwindRefToPersonalityFunctionKind() override { + return ripRel32Got; + } + + Reference::KindValue unwindRefToCIEKind() override { + return negDelta32; + } + + Reference::KindValue unwindRefToFunctionKind() override{ + return unwindFDEToFunction; + } + + Reference::KindValue unwindRefToEhFrameKind() override { + return unwindInfoToEhFrame; + } + + Reference::KindValue pointerKind() override { + return pointer64; + } + + uint32_t dwarfCompactUnwindType() override { + return 0x04000000U; + } + + const StubInfo &stubInfo() override { return _sStubInfo; } + + bool isNonCallBranch(const Reference &) override { + return false; + } + + bool isCallSite(const Reference &) override; + bool isPointer(const Reference &) override; + bool isPairedReloc(const normalized::Relocation &) override; + + llvm::Error getReferenceInfo(const normalized::Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + llvm::Error + getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) override; + + bool needsLocalSymbolInRelocatableFile(const DefinedAtom *atom) override { + return (atom->contentType() == DefinedAtom::typeCString); + } + + void generateAtomContent(const DefinedAtom &atom, bool relocatable, + FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, + uint64_t imageBase, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) override; + + void appendSectionRelocations(const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) override; + +private: + static const Registry::KindStrings _sKindStrings[]; + static const StubInfo _sStubInfo; + + enum X86_64Kind: Reference::KindValue { + invalid, /// for error condition + + // Kinds found in mach-o .o files: + branch32, /// ex: call _foo + ripRel32, /// ex: movq _foo(%rip), %rax + ripRel32Minus1, /// ex: movb $0x12, _foo(%rip) + ripRel32Minus2, /// ex: movw $0x1234, _foo(%rip) + ripRel32Minus4, /// ex: movl $0x12345678, _foo(%rip) + ripRel32Anon, /// ex: movq L1(%rip), %rax + ripRel32Minus1Anon, /// ex: movb $0x12, L1(%rip) + ripRel32Minus2Anon, /// ex: movw $0x1234, L1(%rip) + ripRel32Minus4Anon, /// ex: movw $0x12345678, L1(%rip) + ripRel32GotLoad, /// ex: movq _foo@GOTPCREL(%rip), %rax + ripRel32Got, /// ex: pushq _foo@GOTPCREL(%rip) + ripRel32Tlv, /// ex: movq _foo@TLVP(%rip), %rdi + pointer64, /// ex: .quad _foo + pointer64Anon, /// ex: .quad L1 + delta64, /// ex: .quad _foo - . + delta32, /// ex: .long _foo - . + delta64Anon, /// ex: .quad L1 - . + delta32Anon, /// ex: .long L1 - . + negDelta64, /// ex: .quad . - _foo + negDelta32, /// ex: .long . - _foo + + // Kinds introduced by Passes: + ripRel32GotLoadNowLea, /// Target of GOT load is in linkage unit so + /// "movq _foo@GOTPCREL(%rip), %rax" can be changed + /// to "leaq _foo(%rip), %rax + lazyPointer, /// Location contains a lazy pointer. + lazyImmediateLocation, /// Location contains immediate value used in stub. + + imageOffset, /// Location contains offset of atom in final image + imageOffsetGot, /// Location contains offset of GOT entry for atom in + /// final image (typically personality function). + unwindFDEToFunction, /// Nearly delta64, but cannot be rematerialized in + /// relocatable object (yay for implicit contracts!). + unwindInfoToEhFrame, /// Fix low 24 bits of compact unwind encoding to + /// refer to __eh_frame entry. + tlvInitSectionOffset /// Location contains offset tlv init-value atom + /// within the __thread_data section. + }; + + Reference::KindValue kindFromReloc(const normalized::Relocation &reloc); + + void applyFixupFinal(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, uint64_t targetAddress, + uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress); + + void applyFixupRelocatable(const Reference &ref, uint8_t *location, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress); +}; + +const Registry::KindStrings ArchHandler_x86_64::_sKindStrings[] = { + LLD_KIND_STRING_ENTRY(invalid), LLD_KIND_STRING_ENTRY(branch32), + LLD_KIND_STRING_ENTRY(ripRel32), LLD_KIND_STRING_ENTRY(ripRel32Minus1), + LLD_KIND_STRING_ENTRY(ripRel32Minus2), LLD_KIND_STRING_ENTRY(ripRel32Minus4), + LLD_KIND_STRING_ENTRY(ripRel32Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus1Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus2Anon), + LLD_KIND_STRING_ENTRY(ripRel32Minus4Anon), + LLD_KIND_STRING_ENTRY(ripRel32GotLoad), + LLD_KIND_STRING_ENTRY(ripRel32GotLoadNowLea), + LLD_KIND_STRING_ENTRY(ripRel32Got), LLD_KIND_STRING_ENTRY(ripRel32Tlv), + LLD_KIND_STRING_ENTRY(lazyPointer), + LLD_KIND_STRING_ENTRY(lazyImmediateLocation), + LLD_KIND_STRING_ENTRY(pointer64), LLD_KIND_STRING_ENTRY(pointer64Anon), + LLD_KIND_STRING_ENTRY(delta32), LLD_KIND_STRING_ENTRY(delta64), + LLD_KIND_STRING_ENTRY(delta32Anon), LLD_KIND_STRING_ENTRY(delta64Anon), + LLD_KIND_STRING_ENTRY(negDelta64), + LLD_KIND_STRING_ENTRY(negDelta32), + LLD_KIND_STRING_ENTRY(imageOffset), LLD_KIND_STRING_ENTRY(imageOffsetGot), + LLD_KIND_STRING_ENTRY(unwindFDEToFunction), + LLD_KIND_STRING_ENTRY(unwindInfoToEhFrame), + LLD_KIND_STRING_ENTRY(tlvInitSectionOffset), + LLD_KIND_STRING_END +}; + +const ArchHandler::StubInfo ArchHandler_x86_64::_sStubInfo = { + "dyld_stub_binder", + + // Lazy pointer references + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + { Reference::KindArch::x86_64, lazyPointer, 0, 0 }, + + // GOT pointer to dyld_stub_binder + { Reference::KindArch::x86_64, pointer64, 0, 0 }, + + // x86_64 code alignment 2^1 + 1, + + // Stub size and code + 6, + { 0xff, 0x25, 0x00, 0x00, 0x00, 0x00 }, // jmp *lazyPointer + { Reference::KindArch::x86_64, ripRel32, 2, 0 }, + { false, 0, 0, 0 }, + + // Stub Helper size and code + 10, + { 0x68, 0x00, 0x00, 0x00, 0x00, // pushq $lazy-info-offset + 0xE9, 0x00, 0x00, 0x00, 0x00 }, // jmp helperhelper + { Reference::KindArch::x86_64, lazyImmediateLocation, 1, 0 }, + { Reference::KindArch::x86_64, branch32, 6, 0 }, + + // Stub helper image cache content type + DefinedAtom::typeNonLazyPointer, + + // Stub Helper-Common size and code + 16, + // Stub helper alignment + 2, + { 0x4C, 0x8D, 0x1D, 0x00, 0x00, 0x00, 0x00, // leaq cache(%rip),%r11 + 0x41, 0x53, // push %r11 + 0xFF, 0x25, 0x00, 0x00, 0x00, 0x00, // jmp *binder(%rip) + 0x90 }, // nop + { Reference::KindArch::x86_64, ripRel32, 3, 0 }, + { false, 0, 0, 0 }, + { Reference::KindArch::x86_64, ripRel32, 11, 0 }, + { false, 0, 0, 0 } + +}; + +bool ArchHandler_x86_64::isCallSite(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + return (ref.kindValue() == branch32); +} + +bool ArchHandler_x86_64::isPointer(const Reference &ref) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return false; + assert(ref.kindArch() == Reference::KindArch::x86_64); + Reference::KindValue kind = ref.kindValue(); + return (kind == pointer64 || kind == pointer64Anon); +} + +bool ArchHandler_x86_64::isPairedReloc(const Relocation &reloc) { + return (reloc.type == X86_64_RELOC_SUBTRACTOR); +} + +Reference::KindValue +ArchHandler_x86_64::kindFromReloc(const Relocation &reloc) { + switch(relocPattern(reloc)) { + case X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4: + return branch32; + case X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4: + return ripRel32; + case X86_64_RELOC_SIGNED | rPcRel | rLength4: + return ripRel32Anon; + case X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4: + return ripRel32Minus1; + case X86_64_RELOC_SIGNED_1 | rPcRel | rLength4: + return ripRel32Minus1Anon; + case X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4: + return ripRel32Minus2; + case X86_64_RELOC_SIGNED_2 | rPcRel | rLength4: + return ripRel32Minus2Anon; + case X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4: + return ripRel32Minus4; + case X86_64_RELOC_SIGNED_4 | rPcRel | rLength4: + return ripRel32Minus4Anon; + case X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4: + return ripRel32GotLoad; + case X86_64_RELOC_GOT | rPcRel | rExtern | rLength4: + return ripRel32Got; + case X86_64_RELOC_TLV | rPcRel | rExtern | rLength4: + return ripRel32Tlv; + case X86_64_RELOC_UNSIGNED | rExtern | rLength8: + return pointer64; + case X86_64_RELOC_UNSIGNED | rLength8: + return pointer64Anon; + default: + return invalid; + } +} + +llvm::Error +ArchHandler_x86_64::getReferenceInfo(const Relocation &reloc, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + *kind = kindFromReloc(reloc); + if (*kind == invalid) + return llvm::make_error<GenericError>("unknown type"); + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + switch (*kind) { + case branch32: + case ripRel32: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error(); + case ripRel32Minus1: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 1; + return llvm::Error(); + case ripRel32Minus2: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 2; + return llvm::Error(); + case ripRel32Minus4: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = (int32_t)*(const little32_t *)fixupContent + 4; + return llvm::Error(); + case ripRel32Anon: + targetAddress = fixupAddress + 4 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus1Anon: + targetAddress = fixupAddress + 5 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus2Anon: + targetAddress = fixupAddress + 6 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32Minus4Anon: + targetAddress = fixupAddress + 8 + *(const little32_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + case ripRel32GotLoad: + case ripRel32Got: + case ripRel32Tlv: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + *addend = *(const little32_t *)fixupContent; + return llvm::Error(); + case tlvInitSectionOffset: + case pointer64: + if (auto ec = atomFromSymbolIndex(reloc.symbol, target)) + return ec; + // If this is the 3rd pointer of a tlv-thunk (i.e. the pointer to the TLV's + // initial value) we need to handle it specially. + if (inAtom->contentType() == DefinedAtom::typeThunkTLV && + offsetInAtom == 16) { + *kind = tlvInitSectionOffset; + assert(*addend == 0 && "TLV-init has non-zero addend?"); + } else + *addend = *(const little64_t *)fixupContent; + return llvm::Error(); + case pointer64Anon: + targetAddress = *(const little64_t *)fixupContent; + return atomFromAddress(reloc.symbol, targetAddress, target, addend); + default: + llvm_unreachable("bad reloc kind"); + } +} + +llvm::Error +ArchHandler_x86_64::getPairReferenceInfo(const normalized::Relocation &reloc1, + const normalized::Relocation &reloc2, + const DefinedAtom *inAtom, + uint32_t offsetInAtom, + uint64_t fixupAddress, bool swap, + bool scatterable, + FindAtomBySectionAndAddress atomFromAddress, + FindAtomBySymbolIndex atomFromSymbolIndex, + Reference::KindValue *kind, + const lld::Atom **target, + Reference::Addend *addend) { + const uint8_t *fixupContent = &inAtom->rawContent()[offsetInAtom]; + uint64_t targetAddress; + const lld::Atom *fromTarget; + if (auto ec = atomFromSymbolIndex(reloc1.symbol, &fromTarget)) + return ec; + + switch(relocPattern(reloc1) << 16 | relocPattern(reloc2)) { + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength8): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint64_t encodedAddend = (int64_t)*(const little64_t *)fixupContent; + if (inAtom == fromTarget) { + *kind = delta64; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta64; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error<GenericError>("Invalid pointer diff"); + return llvm::Error(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rExtern | rLength4): { + if (auto ec = atomFromSymbolIndex(reloc2.symbol, target)) + return ec; + uint32_t encodedAddend = (int32_t)*(const little32_t *)fixupContent; + if (inAtom == fromTarget) { + *kind = delta32; + *addend = encodedAddend + offsetInAtom; + } else if (inAtom == *target) { + *kind = negDelta32; + *addend = encodedAddend - offsetInAtom; + *target = fromTarget; + } else + return llvm::make_error<GenericError>("Invalid pointer diff"); + return llvm::Error(); + } + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength8) << 16 | + X86_64_RELOC_UNSIGNED | rLength8): + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("pointer diff not in base atom"); + *kind = delta64Anon; + targetAddress = offsetInAtom + (int64_t)*(const little64_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + case ((X86_64_RELOC_SUBTRACTOR | rExtern | rLength4) << 16 | + X86_64_RELOC_UNSIGNED | rLength4): + if (fromTarget != inAtom) + return llvm::make_error<GenericError>("pointer diff not in base atom"); + *kind = delta32Anon; + targetAddress = offsetInAtom + (int32_t)*(const little32_t *)fixupContent; + return atomFromAddress(reloc2.symbol, targetAddress, target, addend); + default: + return llvm::make_error<GenericError>("unknown pair"); + } +} + +void ArchHandler_x86_64::generateAtomContent( + const DefinedAtom &atom, bool relocatable, FindAddressForAtom findAddress, + FindAddressForAtom findSectionAddress, uint64_t imageBaseAddress, + llvm::MutableArrayRef<uint8_t> atomContentBuffer) { + // Copy raw bytes. + std::copy(atom.rawContent().begin(), atom.rawContent().end(), + atomContentBuffer.begin()); + // Apply fix-ups. + for (const Reference *ref : atom) { + uint32_t offset = ref->offsetInAtom(); + const Atom *target = ref->target(); + uint64_t targetAddress = 0; + if (isa<DefinedAtom>(target)) + targetAddress = findAddress(*target); + uint64_t atomAddress = findAddress(atom); + uint64_t fixupAddress = atomAddress + offset; + if (relocatable) { + applyFixupRelocatable(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress); + } else { + applyFixupFinal(*ref, &atomContentBuffer[offset], + fixupAddress, targetAddress, + atomAddress, imageBaseAddress, findSectionAddress); + } + } +} + +void ArchHandler_x86_64::applyFixupFinal( + const Reference &ref, uint8_t *loc, uint64_t fixupAddress, + uint64_t targetAddress, uint64_t inAtomAddress, uint64_t imageBaseAddress, + FindAddressForAtom findSectionAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Anon: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case pointer64: + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case tlvInitSectionOffset: + *loc64 = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + return; + case ripRel32Minus1: + case ripRel32Minus1Anon: + *loc32 = targetAddress - (fixupAddress + 5) + ref.addend(); + return; + case ripRel32Minus2: + case ripRel32Minus2Anon: + *loc32 = targetAddress - (fixupAddress + 6) + ref.addend(); + return; + case ripRel32Minus4: + case ripRel32Minus4Anon: + *loc32 = targetAddress - (fixupAddress + 8) + ref.addend(); + return; + case delta32: + case delta32Anon: + *loc32 = targetAddress - fixupAddress + ref.addend(); + return; + case delta64: + case delta64Anon: + case unwindFDEToFunction: + *loc64 = targetAddress - fixupAddress + ref.addend(); + return; + case ripRel32GotLoadNowLea: + // Change MOVQ to LEA + assert(loc[-2] == 0x8B); + loc[-2] = 0x8D; + *loc32 = targetAddress - (fixupAddress + 4) + ref.addend(); + return; + case negDelta64: + *loc64 = fixupAddress - targetAddress + ref.addend(); + return; + case negDelta32: + *loc32 = fixupAddress - targetAddress + ref.addend(); + return; + case lazyPointer: + // Do nothing + return; + case lazyImmediateLocation: + *loc32 = ref.addend(); + return; + case imageOffset: + case imageOffsetGot: + *loc32 = (targetAddress - imageBaseAddress) + ref.addend(); + return; + case unwindInfoToEhFrame: { + uint64_t val = targetAddress - findSectionAddress(*ref.target()) + ref.addend(); + assert(val < 0xffffffU && "offset in __eh_frame too large"); + *loc32 = (*loc32 & 0xff000000U) | val; + return; + } + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("invalid x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::applyFixupRelocatable(const Reference &ref, + uint8_t *loc, + uint64_t fixupAddress, + uint64_t targetAddress, + uint64_t inAtomAddress) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + ulittle32_t *loc32 = reinterpret_cast<ulittle32_t *>(loc); + ulittle64_t *loc64 = reinterpret_cast<ulittle64_t *>(loc); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + case ripRel32: + case ripRel32Got: + case ripRel32GotLoad: + case ripRel32Tlv: + *loc32 = ref.addend(); + return; + case ripRel32Anon: + *loc32 = (targetAddress - (fixupAddress + 4)) + ref.addend(); + return; + case tlvInitSectionOffset: + case pointer64: + *loc64 = ref.addend(); + return; + case pointer64Anon: + *loc64 = targetAddress + ref.addend(); + return; + case ripRel32Minus1: + *loc32 = ref.addend() - 1; + return; + case ripRel32Minus1Anon: + *loc32 = (targetAddress - (fixupAddress + 5)) + ref.addend(); + return; + case ripRel32Minus2: + *loc32 = ref.addend() - 2; + return; + case ripRel32Minus2Anon: + *loc32 = (targetAddress - (fixupAddress + 6)) + ref.addend(); + return; + case ripRel32Minus4: + *loc32 = ref.addend() - 4; + return; + case ripRel32Minus4Anon: + *loc32 = (targetAddress - (fixupAddress + 8)) + ref.addend(); + return; + case delta32: + *loc32 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta32Anon: + // The value we write here should be the the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc32 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case delta64: + *loc64 = ref.addend() + inAtomAddress - fixupAddress; + return; + case delta64Anon: + // The value we write here should be the the delta to the target + // after taking in to account the difference from the fixup back to the + // last defined label + // ie, if we have: + // _base: ... + // Lfixup: .quad Ltarget - . + // ... + // Ltarget: + // + // Then we want to encode the value (Ltarget + addend) - (LFixup - _base) + *loc64 = (targetAddress + ref.addend()) - (fixupAddress - inAtomAddress); + return; + case negDelta64: + *loc64 = ref.addend() + fixupAddress - inAtomAddress; + return; + case negDelta32: + *loc32 = ref.addend() + fixupAddress - inAtomAddress; + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + case unwindInfoToEhFrame: + llvm_unreachable("fixup implies __unwind_info"); + return; + case unwindFDEToFunction: + // Do nothing for now + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +void ArchHandler_x86_64::appendSectionRelocations( + const DefinedAtom &atom, + uint64_t atomSectionOffset, + const Reference &ref, + FindSymbolIndexForAtom symbolIndexForAtom, + FindSectionIndexForAtom sectionIndexForAtom, + FindAddressForAtom addressForAtom, + normalized::Relocations &relocs) { + if (ref.kindNamespace() != Reference::KindNamespace::mach_o) + return; + assert(ref.kindArch() == Reference::KindArch::x86_64); + uint32_t sectionOffset = atomSectionOffset + ref.offsetInAtom(); + switch (static_cast<X86_64Kind>(ref.kindValue())) { + case branch32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_BRANCH | rPcRel | rExtern | rLength4); + return; + case ripRel32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED | rPcRel | rLength4 ); + return; + case ripRel32Got: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT | rPcRel | rExtern | rLength4 ); + return; + case ripRel32GotLoad: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_GOT_LOAD | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Tlv: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_TLV | rPcRel | rExtern | rLength4 ); + return; + case tlvInitSectionOffset: + case pointer64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8); + return; + case pointer64Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8); + return; + case ripRel32Minus1: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus1Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_1 | rPcRel | rLength4 ); + return; + case ripRel32Minus2: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus2Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_2 | rPcRel | rLength4 ); + return; + case ripRel32Minus4: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rExtern | rLength4 ); + return; + case ripRel32Minus4Anon: + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SIGNED_4 | rPcRel | rLength4 ); + return; + case delta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case delta32Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength4 ); + return; + case delta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case delta64Anon: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, sectionIndexForAtom(*ref.target()), 0, + X86_64_RELOC_UNSIGNED | rLength8 ); + return; + case unwindFDEToFunction: + case unwindInfoToEhFrame: + return; + case negDelta32: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength4 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength4 ); + return; + case negDelta64: + appendReloc(relocs, sectionOffset, symbolIndexForAtom(*ref.target()), 0, + X86_64_RELOC_SUBTRACTOR | rExtern | rLength8 ); + appendReloc(relocs, sectionOffset, symbolIndexForAtom(atom), 0, + X86_64_RELOC_UNSIGNED | rExtern | rLength8 ); + return; + case ripRel32GotLoadNowLea: + llvm_unreachable("ripRel32GotLoadNowLea implies GOT pass was run"); + return; + case lazyPointer: + case lazyImmediateLocation: + llvm_unreachable("lazy reference kind implies Stubs pass was run"); + return; + case imageOffset: + case imageOffsetGot: + llvm_unreachable("__unwind_info references should have been resolved"); + return; + case invalid: + // Fall into llvm_unreachable(). + break; + } + llvm_unreachable("unknown x86_64 Reference Kind"); +} + +std::unique_ptr<mach_o::ArchHandler> ArchHandler::create_x86_64() { + return std::unique_ptr<mach_o::ArchHandler>(new ArchHandler_x86_64()); +} + +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h new file mode 100644 index 00000000000..573efca9f6f --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/Atoms.h @@ -0,0 +1,181 @@ +//===- lib/ReaderWriter/MachO/Atoms.h ---------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_ATOMS_H +#define LLD_READER_WRITER_MACHO_ATOMS_H + +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include <cstdint> +#include <string> + +namespace lld { + +class File; + +namespace mach_o { + +class MachODefinedAtom : public SimpleDefinedAtom { +public: + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, Merge merge, bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, Alignment align) + : SimpleDefinedAtom(f), _name(name), _content(content), + _align(align), _contentType(type), _scope(scope), _merge(merge), + _thumb(thumb), _noDeadStrip(noDeadStrip) {} + + // Constructor for zero-fill content + MachODefinedAtom(const File &f, const StringRef name, Scope scope, + ContentType type, uint64_t size, bool noDeadStrip, + Alignment align) + : SimpleDefinedAtom(f), _name(name), + _content(ArrayRef<uint8_t>(nullptr, size)), _align(align), + _contentType(type), _scope(scope), _merge(mergeNo), _thumb(false), + _noDeadStrip(noDeadStrip) {} + + ~MachODefinedAtom() override = default; + + uint64_t size() const override { return _content.size(); } + + ContentType contentType() const override { return _contentType; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + Merge merge() const override { return _merge; } + + DeadStripKind deadStrip() const override { + if (_contentType == DefinedAtom::typeInitializerPtr) + return deadStripNever; + if (_contentType == DefinedAtom::typeTerminatorPtr) + return deadStripNever; + if (_noDeadStrip) + return deadStripNever; + return deadStripNormal; + } + + ArrayRef<uint8_t> rawContent() const override { + // Note: Zerofill atoms have a content pointer which is null. + return _content; + } + + bool isThumb() const { return _thumb; } + +private: + const StringRef _name; + const ArrayRef<uint8_t> _content; + const DefinedAtom::Alignment _align; + const ContentType _contentType; + const Scope _scope; + const Merge _merge; + const bool _thumb; + const bool _noDeadStrip; +}; + +class MachODefinedCustomSectionAtom : public MachODefinedAtom { +public: + MachODefinedCustomSectionAtom(const File &f, const StringRef name, + Scope scope, ContentType type, Merge merge, + bool thumb, bool noDeadStrip, + const ArrayRef<uint8_t> content, + StringRef sectionName, Alignment align) + : MachODefinedAtom(f, name, scope, type, merge, thumb, noDeadStrip, + content, align), + _sectionName(sectionName) {} + + ~MachODefinedCustomSectionAtom() override = default; + + SectionChoice sectionChoice() const override { + return DefinedAtom::sectionCustomRequired; + } + + StringRef customSectionName() const override { + return _sectionName; + } +private: + StringRef _sectionName; +}; + +class MachOTentativeDefAtom : public SimpleDefinedAtom { +public: + MachOTentativeDefAtom(const File &f, const StringRef name, Scope scope, + uint64_t size, DefinedAtom::Alignment align) + : SimpleDefinedAtom(f), _name(name), _scope(scope), _size(size), + _align(align) {} + + ~MachOTentativeDefAtom() override = default; + + uint64_t size() const override { return _size; } + + Merge merge() const override { return DefinedAtom::mergeAsTentative; } + + ContentType contentType() const override { return DefinedAtom::typeZeroFill; } + + Alignment alignment() const override { return _align; } + + StringRef name() const override { return _name; } + + Scope scope() const override { return _scope; } + + ArrayRef<uint8_t> rawContent() const override { return ArrayRef<uint8_t>(); } + +private: + const std::string _name; + const Scope _scope; + const uint64_t _size; + const DefinedAtom::Alignment _align; +}; + +class MachOSharedLibraryAtom : public SharedLibraryAtom { +public: + MachOSharedLibraryAtom(const File &file, StringRef name, + StringRef dylibInstallName, bool weakDef) + : SharedLibraryAtom(), _file(file), _name(name), + _dylibInstallName(dylibInstallName) {} + ~MachOSharedLibraryAtom() override = default; + + StringRef loadName() const override { return _dylibInstallName; } + + bool canBeNullAtRuntime() const override { + // FIXME: this may actually be changeable. For now, all symbols are strongly + // defined though. + return false; + } + + const File &file() const override { return _file; } + + StringRef name() const override { return _name; } + + Type type() const override { + // Unused in MachO (I think). + return Type::Unknown; + } + + uint64_t size() const override { + // Unused in MachO (I think) + return 0; + } + +private: + const File &_file; + StringRef _name; + StringRef _dylibInstallName; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_ATOMS_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt new file mode 100644 index 00000000000..70f451c997b --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CMakeLists.txt @@ -0,0 +1,29 @@ +add_lld_library(lldMachO + ArchHandler.cpp + ArchHandler_arm.cpp + ArchHandler_arm64.cpp + ArchHandler_x86.cpp + ArchHandler_x86_64.cpp + CompactUnwindPass.cpp + GOTPass.cpp + LayoutPass.cpp + MachOLinkingContext.cpp + MachONormalizedFileBinaryReader.cpp + MachONormalizedFileBinaryWriter.cpp + MachONormalizedFileFromAtoms.cpp + MachONormalizedFileToAtoms.cpp + MachONormalizedFileYAML.cpp + ObjCPass.cpp + ShimPass.cpp + StubsPass.cpp + TLVPass.cpp + WriterMachO.cpp + LINK_LIBS + lldCore + lldYAML + LLVMObject + LLVMSupport + ${PTHREAD_LIB} + ) + +include_directories(.) diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp new file mode 100644 index 00000000000..6f5ab83dbda --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/CompactUnwindPass.cpp @@ -0,0 +1,582 @@ +//===- lib/ReaderWriter/MachO/CompactUnwindPass.cpp -------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file A pass to convert MachO's __compact_unwind sections into the final +/// __unwind_info format used during runtime. See +/// mach-o/compact_unwind_encoding.h for more details on the formats involved. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" + +#define DEBUG_TYPE "macho-compact-unwind" + +namespace lld { +namespace mach_o { + +namespace { +struct CompactUnwindEntry { + const Atom *rangeStart; + const Atom *personalityFunction; + const Atom *lsdaLocation; + const Atom *ehFrame; + + uint32_t rangeLength; + + // There are 3 types of compact unwind entry, distinguished by the encoding + // value: 0 indicates a function with no unwind info; + // _archHandler.dwarfCompactUnwindType() indicates that the entry defers to + // __eh_frame, and that the ehFrame entry will be valid; any other value is a + // real compact unwind entry -- personalityFunction will be set and + // lsdaLocation may be. + uint32_t encoding; + + CompactUnwindEntry(const DefinedAtom *function) + : rangeStart(function), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(function->size()), + encoding(0) {} + + CompactUnwindEntry() + : rangeStart(nullptr), personalityFunction(nullptr), + lsdaLocation(nullptr), ehFrame(nullptr), rangeLength(0), encoding(0) {} +}; + +struct UnwindInfoPage { + ArrayRef<CompactUnwindEntry> entries; +}; +} + +class UnwindInfoAtom : public SimpleDefinedAtom { +public: + UnwindInfoAtom(ArchHandler &archHandler, const File &file, bool isBig, + std::vector<const Atom *> &personalities, + std::vector<uint32_t> &commonEncodings, + std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) + : SimpleDefinedAtom(file), _archHandler(archHandler), + _commonEncodingsOffset(7 * sizeof(uint32_t)), + _personalityArrayOffset(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)), + _topLevelIndexOffset(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)), + _lsdaIndexOffset(_topLevelIndexOffset + + 3 * (pages.size() + 1) * sizeof(uint32_t)), + _firstPageOffset(_lsdaIndexOffset + 2 * numLSDAs * sizeof(uint32_t)), + _isBig(isBig) { + + addHeader(commonEncodings.size(), personalities.size(), pages.size()); + addCommonEncodings(commonEncodings); + addPersonalityFunctions(personalities); + addTopLevelIndexes(pages); + addLSDAIndexes(pages, numLSDAs); + addSecondLevelPages(pages); + } + + ~UnwindInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeProcessedUnwindInfo; + } + + Alignment alignment() const override { return 4; } + + uint64_t size() const override { return _contents.size(); } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { return _contents; } + + void addHeader(uint32_t numCommon, uint32_t numPersonalities, + uint32_t numPages) { + using normalized::write32; + + uint32_t headerSize = 7 * sizeof(uint32_t); + _contents.resize(headerSize); + + uint8_t *headerEntries = _contents.data(); + // version + write32(headerEntries, 1, _isBig); + // commonEncodingsArraySectionOffset + write32(headerEntries + sizeof(uint32_t), _commonEncodingsOffset, _isBig); + // commonEncodingsArrayCount + write32(headerEntries + 2 * sizeof(uint32_t), numCommon, _isBig); + // personalityArraySectionOffset + write32(headerEntries + 3 * sizeof(uint32_t), _personalityArrayOffset, + _isBig); + // personalityArrayCount + write32(headerEntries + 4 * sizeof(uint32_t), numPersonalities, _isBig); + // indexSectionOffset + write32(headerEntries + 5 * sizeof(uint32_t), _topLevelIndexOffset, _isBig); + // indexCount + write32(headerEntries + 6 * sizeof(uint32_t), numPages + 1, _isBig); + } + + /// Add the list of common encodings to the section; this is simply an array + /// of uint32_t compact values. Size has already been specified in the header. + void addCommonEncodings(std::vector<uint32_t> &commonEncodings) { + using normalized::write32; + + _contents.resize(_commonEncodingsOffset + + commonEncodings.size() * sizeof(uint32_t)); + uint8_t *commonEncodingsArea = + reinterpret_cast<uint8_t *>(_contents.data() + _commonEncodingsOffset); + + for (uint32_t encoding : commonEncodings) { + write32(commonEncodingsArea, encoding, _isBig); + commonEncodingsArea += sizeof(uint32_t); + } + } + + void addPersonalityFunctions(std::vector<const Atom *> personalities) { + _contents.resize(_personalityArrayOffset + + personalities.size() * sizeof(uint32_t)); + + for (unsigned i = 0; i < personalities.size(); ++i) + addImageReferenceIndirect(_personalityArrayOffset + i * sizeof(uint32_t), + personalities[i]); + } + + void addTopLevelIndexes(std::vector<UnwindInfoPage> &pages) { + using normalized::write32; + + uint32_t numIndexes = pages.size() + 1; + _contents.resize(_topLevelIndexOffset + numIndexes * 3 * sizeof(uint32_t)); + + uint32_t pageLoc = _firstPageOffset; + + // The most difficult job here is calculating the LSDAs; everything else + // follows fairly naturally, but we can't state where the first + uint8_t *indexData = &_contents[_topLevelIndexOffset]; + uint32_t numLSDAs = 0; + for (unsigned i = 0; i < pages.size(); ++i) { + // functionOffset + addImageReference(_topLevelIndexOffset + 3 * i * sizeof(uint32_t), + pages[i].entries[0].rangeStart); + // secondLevelPagesSectionOffset + write32(indexData + (3 * i + 1) * sizeof(uint32_t), pageLoc, _isBig); + write32(indexData + (3 * i + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + + for (auto &entry : pages[i].entries) + if (entry.lsdaLocation) + ++numLSDAs; + } + + // Finally, write out the final sentinel index + auto &finalEntry = pages[pages.size() - 1].entries.back(); + addImageReference(_topLevelIndexOffset + + 3 * pages.size() * sizeof(uint32_t), + finalEntry.rangeStart, finalEntry.rangeLength); + // secondLevelPagesSectionOffset => 0 + write32(indexData + (3 * pages.size() + 2) * sizeof(uint32_t), + _lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t), _isBig); + } + + void addLSDAIndexes(std::vector<UnwindInfoPage> &pages, uint32_t numLSDAs) { + _contents.resize(_lsdaIndexOffset + numLSDAs * 2 * sizeof(uint32_t)); + + uint32_t curOffset = _lsdaIndexOffset; + for (auto &page : pages) { + for (auto &entry : page.entries) { + if (!entry.lsdaLocation) + continue; + + addImageReference(curOffset, entry.rangeStart); + addImageReference(curOffset + sizeof(uint32_t), entry.lsdaLocation); + curOffset += 2 * sizeof(uint32_t); + } + } + } + + void addSecondLevelPages(std::vector<UnwindInfoPage> &pages) { + for (auto &page : pages) { + addRegularSecondLevelPage(page); + } + } + + void addRegularSecondLevelPage(const UnwindInfoPage &page) { + uint32_t curPageOffset = _contents.size(); + const int16_t headerSize = sizeof(uint32_t) + 2 * sizeof(uint16_t); + uint32_t curPageSize = + headerSize + 2 * page.entries.size() * sizeof(uint32_t); + _contents.resize(curPageOffset + curPageSize); + + using normalized::write32; + using normalized::write16; + // 2 => regular page + write32(&_contents[curPageOffset], 2, _isBig); + // offset of 1st entry + write16(&_contents[curPageOffset + 4], headerSize, _isBig); + write16(&_contents[curPageOffset + 6], page.entries.size(), _isBig); + + uint32_t pagePos = curPageOffset + headerSize; + for (auto &entry : page.entries) { + addImageReference(pagePos, entry.rangeStart); + + write32(_contents.data() + pagePos + sizeof(uint32_t), entry.encoding, + _isBig); + if ((entry.encoding & 0x0f000000U) == + _archHandler.dwarfCompactUnwindType()) + addEhFrameReference(pagePos + sizeof(uint32_t), entry.ehFrame); + + pagePos += 2 * sizeof(uint32_t); + } + } + + void addEhFrameReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.unwindRefToEhFrameKind(), offset, dest, addend); + } + + void addImageReference(uint32_t offset, const Atom *dest, + Reference::Addend addend = 0) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKind(), offset, dest, addend); + } + + void addImageReferenceIndirect(uint32_t offset, const Atom *dest) { + addReference(Reference::KindNamespace::mach_o, _archHandler.kindArch(), + _archHandler.imageOffsetKindIndirect(), offset, dest, 0); + } + +private: + mach_o::ArchHandler &_archHandler; + std::vector<uint8_t> _contents; + uint32_t _commonEncodingsOffset; + uint32_t _personalityArrayOffset; + uint32_t _topLevelIndexOffset; + uint32_t _lsdaIndexOffset; + uint32_t _firstPageOffset; + bool _isBig; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class CompactUnwindPass : public Pass { +public: + CompactUnwindPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o Compact Unwind Pass>")), + _isBig(MachOLinkingContext::isBigEndian(_ctx.arch())) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + DEBUG(llvm::dbgs() << "MachO Compact Unwind pass\n"); + + std::map<const Atom *, CompactUnwindEntry> unwindLocs; + std::map<const Atom *, const Atom *> dwarfFrames; + std::vector<const Atom *> personalities; + uint32_t numLSDAs = 0; + + // First collect all __compact_unwind and __eh_frame entries, addressable by + // the function referred to. + collectCompactUnwindEntries(mergedFile, unwindLocs, personalities, + numLSDAs); + + collectDwarfFrameEntries(mergedFile, dwarfFrames); + + // Skip rest of pass if no unwind info. + if (unwindLocs.empty() && dwarfFrames.empty()) + return llvm::Error(); + + // FIXME: if there are more than 4 personality functions then we need to + // defer to DWARF info for the ones we don't put in the list. They should + // also probably be sorted by frequency. + assert(personalities.size() <= 4); + + // TODO: Find commmon encodings for use by compressed pages. + std::vector<uint32_t> commonEncodings; + + // Now sort the entries by final address and fixup the compact encoding to + // its final form (i.e. set personality function bits & create DWARF + // references where needed). + std::vector<CompactUnwindEntry> unwindInfos = createUnwindInfoEntries( + mergedFile, unwindLocs, personalities, dwarfFrames); + + // Remove any unused eh-frame atoms. + pruneUnusedEHFrames(mergedFile, unwindInfos, unwindLocs, dwarfFrames); + + // Finally, we can start creating pages based on these entries. + + DEBUG(llvm::dbgs() << " Splitting entries into pages\n"); + // FIXME: we split the entries into pages naively: lots of 4k pages followed + // by a small one. ld64 tried to minimize space and align them to real 4k + // boundaries. That might be worth doing, or perhaps we could perform some + // minor balancing for expected number of lookups. + std::vector<UnwindInfoPage> pages; + auto remainingInfos = llvm::makeArrayRef(unwindInfos); + do { + pages.push_back(UnwindInfoPage()); + + // FIXME: we only create regular pages at the moment. These can hold up to + // 1021 entries according to the documentation. + unsigned entriesInPage = std::min(1021U, (unsigned)remainingInfos.size()); + + pages.back().entries = remainingInfos.slice(0, entriesInPage); + remainingInfos = remainingInfos.slice(entriesInPage); + + DEBUG(llvm::dbgs() + << " Page from " << pages.back().entries[0].rangeStart->name() + << " to " << pages.back().entries.back().rangeStart->name() << " + " + << llvm::format("0x%x", pages.back().entries.back().rangeLength) + << " has " << entriesInPage << " entries\n"); + } while (!remainingInfos.empty()); + + auto *unwind = new (_file.allocator()) + UnwindInfoAtom(_archHandler, _file, _isBig, personalities, + commonEncodings, pages, numLSDAs); + mergedFile.addAtom(*unwind); + + // Finally, remove all __compact_unwind atoms now that we've processed them. + mergedFile.removeDefinedAtomsIf([](const DefinedAtom *atom) { + return atom->contentType() == DefinedAtom::typeCompactUnwindInfo; + }); + + return llvm::Error(); + } + + void collectCompactUnwindEntries( + const SimpleFile &mergedFile, + std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + std::vector<const Atom *> &personalities, uint32_t &numLSDAs) { + DEBUG(llvm::dbgs() << " Collecting __compact_unwind entries\n"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCompactUnwindInfo) + continue; + + auto unwindEntry = extractCompactUnwindEntry(atom); + unwindLocs.insert(std::make_pair(unwindEntry.rangeStart, unwindEntry)); + + DEBUG(llvm::dbgs() << " Entry for " << unwindEntry.rangeStart->name() + << ", encoding=" + << llvm::format("0x%08x", unwindEntry.encoding)); + if (unwindEntry.personalityFunction) + DEBUG(llvm::dbgs() << ", personality=" + << unwindEntry.personalityFunction->name() + << ", lsdaLoc=" << unwindEntry.lsdaLocation->name()); + DEBUG(llvm::dbgs() << '\n'); + + // Count number of LSDAs we see, since we need to know how big the index + // will be while laying out the section. + if (unwindEntry.lsdaLocation) + ++numLSDAs; + + // Gather the personality functions now, so that they're in deterministic + // order (derived from the DefinedAtom order). + if (unwindEntry.personalityFunction) { + auto pFunc = std::find(personalities.begin(), personalities.end(), + unwindEntry.personalityFunction); + if (pFunc == personalities.end()) + personalities.push_back(unwindEntry.personalityFunction); + } + } + } + + CompactUnwindEntry extractCompactUnwindEntry(const DefinedAtom *atom) { + CompactUnwindEntry entry; + + for (const Reference *ref : *atom) { + switch (ref->offsetInAtom()) { + case 0: + // FIXME: there could legitimately be functions with multiple encoding + // entries. However, nothing produces them at the moment. + assert(ref->addend() == 0 && "unexpected offset into function"); + entry.rangeStart = ref->target(); + break; + case 0x10: + assert(ref->addend() == 0 && "unexpected offset into personality fn"); + entry.personalityFunction = ref->target(); + break; + case 0x18: + assert(ref->addend() == 0 && "unexpected offset into LSDA atom"); + entry.lsdaLocation = ref->target(); + break; + } + } + + if (atom->rawContent().size() < 4 * sizeof(uint32_t)) + return entry; + + using normalized::read32; + entry.rangeLength = + read32(atom->rawContent().data() + 2 * sizeof(uint32_t), _isBig); + entry.encoding = + read32(atom->rawContent().data() + 3 * sizeof(uint32_t), _isBig); + return entry; + } + + void + collectDwarfFrameEntries(const SimpleFile &mergedFile, + std::map<const Atom *, const Atom *> &dwarfFrames) { + for (const DefinedAtom *ehFrameAtom : mergedFile.defined()) { + if (ehFrameAtom->contentType() != DefinedAtom::typeCFI) + continue; + if (ArchHandler::isDwarfCIE(_isBig, ehFrameAtom)) + continue; + + if (const Atom *function = _archHandler.fdeTargetFunction(ehFrameAtom)) + dwarfFrames[function] = ehFrameAtom; + } + } + + /// Every atom defined in __TEXT,__text needs an entry in the final + /// __unwind_info section (in order). These comes from two sources: + /// + Input __compact_unwind sections where possible (after adding the + /// personality function offset which is only known now). + /// + A synthesised reference to __eh_frame if there's no __compact_unwind + /// or too many personality functions to be accommodated. + std::vector<CompactUnwindEntry> createUnwindInfoEntries( + const SimpleFile &mergedFile, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + std::vector<CompactUnwindEntry> unwindInfos; + + DEBUG(llvm::dbgs() << " Creating __unwind_info entries\n"); + // The final order in the __unwind_info section must be derived from the + // order of typeCode atoms, since that's how they'll be put into the object + // file eventually (yuck!). + for (const DefinedAtom *atom : mergedFile.defined()) { + if (atom->contentType() != DefinedAtom::typeCode) + continue; + + unwindInfos.push_back(finalizeUnwindInfoEntryForAtom( + atom, unwindLocs, personalities, dwarfFrames)); + + DEBUG(llvm::dbgs() << " Entry for " << atom->name() + << ", final encoding=" + << llvm::format("0x%08x", unwindInfos.back().encoding) + << '\n'); + } + + return unwindInfos; + } + + /// Remove unused EH frames. + /// + /// An EH frame is considered unused if there is a corresponding compact + /// unwind atom that doesn't require the EH frame. + void pruneUnusedEHFrames( + SimpleFile &mergedFile, + const std::vector<CompactUnwindEntry> &unwindInfos, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + + // Worklist of all 'used' FDEs. + std::vector<const DefinedAtom *> usedDwarfWorklist; + + // We have to check two conditions when building the worklist: + // (1) EH frames used by compact unwind entries. + for (auto &entry : unwindInfos) + if (entry.ehFrame) + usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.ehFrame)); + + // (2) EH frames that reference functions with no corresponding compact + // unwind info. + for (auto &entry : dwarfFrames) + if (!unwindLocs.count(entry.first)) + usedDwarfWorklist.push_back(cast<DefinedAtom>(entry.second)); + + // Add all transitively referenced CFI atoms by processing the worklist. + std::set<const Atom *> usedDwarfFrames; + while (!usedDwarfWorklist.empty()) { + const DefinedAtom *cfiAtom = usedDwarfWorklist.back(); + usedDwarfWorklist.pop_back(); + usedDwarfFrames.insert(cfiAtom); + for (const auto *ref : *cfiAtom) { + const DefinedAtom *cfiTarget = dyn_cast<DefinedAtom>(ref->target()); + if (cfiTarget->contentType() == DefinedAtom::typeCFI) + usedDwarfWorklist.push_back(cfiTarget); + } + } + + // Finally, delete all unreferenced CFI atoms. + mergedFile.removeDefinedAtomsIf([&](const DefinedAtom *atom) { + if ((atom->contentType() == DefinedAtom::typeCFI) && + !usedDwarfFrames.count(atom)) + return true; + return false; + }); + } + + CompactUnwindEntry finalizeUnwindInfoEntryForAtom( + const DefinedAtom *function, + const std::map<const Atom *, CompactUnwindEntry> &unwindLocs, + const std::vector<const Atom *> &personalities, + const std::map<const Atom *, const Atom *> &dwarfFrames) { + auto unwindLoc = unwindLocs.find(function); + + CompactUnwindEntry entry; + if (unwindLoc == unwindLocs.end()) { + // Default entry has correct encoding (0 => no unwind), but we need to + // synthesise the function. + entry.rangeStart = function; + entry.rangeLength = function->size(); + } else + entry = unwindLoc->second; + + + // If there's no __compact_unwind entry, or it explicitly says to use + // __eh_frame, we need to try and fill in the correct DWARF atom. + if (entry.encoding == _archHandler.dwarfCompactUnwindType() || + entry.encoding == 0) { + auto dwarfFrame = dwarfFrames.find(function); + if (dwarfFrame != dwarfFrames.end()) { + entry.encoding = _archHandler.dwarfCompactUnwindType(); + entry.ehFrame = dwarfFrame->second; + } + } + + auto personality = std::find(personalities.begin(), personalities.end(), + entry.personalityFunction); + uint32_t personalityIdx = personality == personalities.end() + ? 0 + : personality - personalities.begin() + 1; + + // FIXME: We should also use DWARF when there isn't enough room for the + // personality function in the compact encoding. + assert(personalityIdx < 4 && "too many personality functions"); + + entry.encoding |= personalityIdx << 28; + + if (entry.lsdaLocation) + entry.encoding |= 1U << 30; + + return entry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + bool _isBig; +}; + +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsCompactUnwindPass()); + pm.add(llvm::make_unique<CompactUnwindPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h new file mode 100644 index 00000000000..acced33b7e7 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ExecutableAtoms.h @@ -0,0 +1,155 @@ +//===- lib/ReaderWriter/MachO/ExecutableAtoms.h ---------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H +#define LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H + +#include "Atoms.h" +#include "File.h" + +#include "llvm/Support/MachO.h" + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + + +// +// CEntryFile adds an UndefinedAtom for "_main" so that the Resolving +// phase will fail if "_main" is undefined. +// +class CEntryFile : public SimpleFile { +public: + CEntryFile(const MachOLinkingContext &context) + : SimpleFile("C entry", kindCEntryObject), + _undefMain(*this, context.entrySymbolName()) { + this->addAtom(_undefMain); + } + +private: + SimpleUndefinedAtom _undefMain; +}; + + +// +// StubHelperFile adds an UndefinedAtom for "dyld_stub_binder" so that +// the Resolveing phase will fail if "dyld_stub_binder" is undefined. +// +class StubHelperFile : public SimpleFile { +public: + StubHelperFile(const MachOLinkingContext &context) + : SimpleFile("stub runtime", kindStubHelperObject), + _undefBinder(*this, context.binderSymbolName()) { + this->addAtom(_undefBinder); + } + +private: + SimpleUndefinedAtom _undefBinder; +}; + + +// +// MachHeaderAliasFile lazily instantiates the magic symbols that mark the start +// of the mach_header for final linked images. +// +class MachHeaderAliasFile : public SimpleFile { +public: + MachHeaderAliasFile(const MachOLinkingContext &context) + : SimpleFile("mach_header symbols", kindHeaderObject) { + StringRef machHeaderSymbolName; + DefinedAtom::Scope symbolScope = DefinedAtom::scopeLinkageUnit; + StringRef dsoHandleName; + switch (context.outputMachOType()) { + case llvm::MachO::MH_OBJECT: + machHeaderSymbolName = "__mh_object_header"; + break; + case llvm::MachO::MH_EXECUTE: + machHeaderSymbolName = "__mh_execute_header"; + symbolScope = DefinedAtom::scopeGlobal; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_FVMLIB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_CORE: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_PRELOAD: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DYLIB: + machHeaderSymbolName = "__mh_dylib_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLINKER: + machHeaderSymbolName = "__mh_dylinker_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_BUNDLE: + machHeaderSymbolName = "__mh_bundle_header"; + dsoHandleName = "___dso_handle"; + break; + case llvm::MachO::MH_DYLIB_STUB: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_DSYM: + llvm_unreachable("no mach_header symbol for file type"); + case llvm::MachO::MH_KEXT_BUNDLE: + dsoHandleName = "___dso_handle"; + break; + } + if (!machHeaderSymbolName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, machHeaderSymbolName, symbolScope, + DefinedAtom::typeMachHeader, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef<uint8_t>(), DefinedAtom::Alignment(4096))); + + if (!dsoHandleName.empty()) + _definedAtoms.push_back(new (allocator()) MachODefinedAtom( + *this, dsoHandleName, DefinedAtom::scopeLinkageUnit, + DefinedAtom::typeDSOHandle, DefinedAtom::mergeNo, false, + true /* noDeadStrip */, + ArrayRef<uint8_t>(), DefinedAtom::Alignment(1))); + } + + const AtomRange<DefinedAtom> defined() const override { + return _definedAtoms; + } + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + +private: + mutable AtomVector<DefinedAtom> _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_EXECUTABLE_ATOMS_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/File.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/File.h new file mode 100644 index 00000000000..64a0fcf8284 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/File.h @@ -0,0 +1,386 @@ +//===- lib/ReaderWriter/MachO/File.h ----------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FILE_H +#define LLD_READER_WRITER_MACHO_FILE_H + +#include "Atoms.h" +#include "MachONormalizedFile.h" +#include "lld/Core/SharedLibraryFile.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include <unordered_map> + +namespace lld { +namespace mach_o { + +using lld::mach_o::normalized::Section; + +class MachOFile : public SimpleFile { +public: + MachOFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SimpleFile(mb->getBufferIdentifier(), File::kindMachObject), + _mb(std::move(mb)), _ctx(ctx) {} + + MachOFile(StringRef path) : SimpleFile(path, File::kindMachObject) {} + + void addDefinedAtom(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + uint64_t sectionOffset, uint64_t contentSize, bool thumb, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, merge, + thumb, noDeadStrip, content, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addDefinedAtomInCustomSection(StringRef name, Atom::Scope scope, + DefinedAtom::ContentType type, DefinedAtom::Merge merge, + bool thumb, bool noDeadStrip, uint64_t sectionOffset, + uint64_t contentSize, StringRef sectionName, + bool copyRefs, const Section *inSection) { + assert(sectionOffset+contentSize <= inSection->content.size()); + ArrayRef<uint8_t> content = inSection->content.slice(sectionOffset, + contentSize); + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + content = content.copy(allocator()); + sectionName = sectionName.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + auto *atom = + new (allocator()) MachODefinedCustomSectionAtom(*this, name, scope, type, + merge, thumb, + noDeadStrip, content, + sectionName, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addZeroFillDefinedAtom(StringRef name, Atom::Scope scope, + uint64_t sectionOffset, uint64_t size, + bool noDeadStrip, bool copyRefs, + const Section *inSection) { + if (copyRefs) { + // Make a copy of the atom's name and content that is owned by this file. + name = name.copy(allocator()); + } + DefinedAtom::Alignment align( + inSection->alignment, + sectionOffset % inSection->alignment); + + DefinedAtom::ContentType type = DefinedAtom::typeUnknown; + switch (inSection->type) { + case llvm::MachO::S_ZEROFILL: + type = DefinedAtom::typeZeroFill; + break; + case llvm::MachO::S_THREAD_LOCAL_ZEROFILL: + type = DefinedAtom::typeTLVInitialZeroFill; + break; + default: + llvm_unreachable("Unrecognized zero-fill section"); + } + + auto *atom = + new (allocator()) MachODefinedAtom(*this, name, scope, type, size, + noDeadStrip, align); + addAtomForSection(inSection, atom, sectionOffset); + } + + void addUndefinedAtom(StringRef name, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = new (allocator()) SimpleUndefinedAtom(*this, name); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + void addTentativeDefAtom(StringRef name, Atom::Scope scope, uint64_t size, + DefinedAtom::Alignment align, bool copyRefs) { + if (copyRefs) { + // Make a copy of the atom's name that is owned by this file. + name = name.copy(allocator()); + } + auto *atom = + new (allocator()) MachOTentativeDefAtom(*this, name, scope, size, align); + addAtom(*atom); + _undefAtoms[name] = atom; + } + + /// Search this file for an the atom from 'section' that covers + /// 'offsetInSect'. Returns nullptr is no atom found. + MachODefinedAtom *findAtomCoveringAddress(const Section §ion, + uint64_t offsetInSect, + uint32_t *foundOffsetAtom=nullptr) { + const auto &pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return nullptr; + const auto &vec = pos->second; + assert(offsetInSect < section.content.size()); + // Vector of atoms for section are already sorted, so do binary search. + const auto &atomPos = std::lower_bound(vec.begin(), vec.end(), offsetInSect, + [offsetInSect](const SectionOffsetAndAtom &ao, + uint64_t targetAddr) -> bool { + // Each atom has a start offset of its slice of the + // section's content. This compare function must return true + // iff the atom's range is before the offset being searched for. + uint64_t atomsEndOffset = ao.offset+ao.atom->rawContent().size(); + return (atomsEndOffset <= offsetInSect); + }); + if (atomPos == vec.end()) + return nullptr; + if (foundOffsetAtom) + *foundOffsetAtom = offsetInSect - atomPos->offset; + return atomPos->atom; + } + + /// Searches this file for an UndefinedAtom named 'name'. Returns + /// nullptr is no such atom found. + const lld::Atom *findUndefAtom(StringRef name) { + auto pos = _undefAtoms.find(name); + if (pos == _undefAtoms.end()) + return nullptr; + return pos->second; + } + + typedef std::function<void (MachODefinedAtom* atom)> DefinedAtomVisitor; + + void eachDefinedAtom(DefinedAtomVisitor vistor) { + for (auto §AndAtoms : _sectionAtoms) { + for (auto &offAndAtom : sectAndAtoms.second) { + vistor(offAndAtom.atom); + } + } + } + + typedef std::function<void(MachODefinedAtom *atom, uint64_t offset)> + SectionAtomVisitor; + + void eachAtomInSection(const Section §ion, SectionAtomVisitor visitor) { + auto pos = _sectionAtoms.find(§ion); + if (pos == _sectionAtoms.end()) + return; + auto vec = pos->second; + + for (auto &offAndAtom : vec) + visitor(offAndAtom.atom, offAndAtom.offset); + } + + MachOLinkingContext::Arch arch() const { return _arch; } + void setArch(MachOLinkingContext::Arch arch) { _arch = arch; } + + MachOLinkingContext::OS OS() const { return _os; } + void setOS(MachOLinkingContext::OS os) { _os = os; } + + MachOLinkingContext::ObjCConstraint objcConstraint() const { + return _objcConstraint; + } + void setObjcConstraint(MachOLinkingContext::ObjCConstraint v) { + _objcConstraint = v; + } + + uint32_t minVersion() const { return _minVersion; } + void setMinVersion(uint32_t v) { _minVersion = v; } + + LoadCommandType minVersionLoadCommandKind() const { + return _minVersionLoadCommandKind; + } + void setMinVersionLoadCommandKind(LoadCommandType v) { + _minVersionLoadCommandKind = v; + } + + uint32_t swiftVersion() const { return _swiftVersion; } + void setSwiftVersion(uint32_t v) { _swiftVersion = v; } + + bool subsectionsViaSymbols() const { + return _flags & llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; + } + void setFlags(normalized::FileFlags v) { _flags = v; } + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const File *F) { + return F->kind() == File::kindMachObject; + } + +protected: + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedObjectToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +private: + struct SectionOffsetAndAtom { uint64_t offset; MachODefinedAtom *atom; }; + + void addAtomForSection(const Section *inSection, MachODefinedAtom* atom, + uint64_t sectionOffset) { + SectionOffsetAndAtom offAndAtom; + offAndAtom.offset = sectionOffset; + offAndAtom.atom = atom; + _sectionAtoms[inSection].push_back(offAndAtom); + addAtom(*atom); + } + + typedef llvm::DenseMap<const normalized::Section *, + std::vector<SectionOffsetAndAtom>> SectionToAtoms; + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + SectionToAtoms _sectionAtoms; + NameToAtom _undefAtoms; + MachOLinkingContext::Arch _arch = MachOLinkingContext::arch_unknown; + MachOLinkingContext::OS _os = MachOLinkingContext::OS::unknown; + uint32_t _minVersion = 0; + LoadCommandType _minVersionLoadCommandKind = (LoadCommandType)0; + MachOLinkingContext::ObjCConstraint _objcConstraint = + MachOLinkingContext::objc_unknown; + uint32_t _swiftVersion = 0; + normalized::FileFlags _flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS; +}; + +class MachODylibFile : public SharedLibraryFile { +public: + MachODylibFile(std::unique_ptr<MemoryBuffer> mb, MachOLinkingContext *ctx) + : SharedLibraryFile(mb->getBufferIdentifier()), + _mb(std::move(mb)), _ctx(ctx) {} + + MachODylibFile(StringRef path) : SharedLibraryFile(path) {} + + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override { + // Pass down _installName so that if this requested symbol + // is re-exported through this dylib, the SharedLibraryAtom's loadName() + // is this dylib installName and not the implementation dylib's. + // NOTE: isData is not needed for dylibs (it matters for static libs). + return exports(name, _installName); + } + + /// Adds symbol name that this dylib exports. The corresponding + /// SharedLibraryAtom is created lazily (since most symbols are not used). + void addExportedSymbol(StringRef name, bool weakDef, bool copyRefs) { + if (copyRefs) { + name = name.copy(allocator()); + } + AtomAndFlags info(weakDef); + _nameToAtom[name] = info; + } + + void addReExportedDylib(StringRef dylibPath) { + _reExportedDylibs.emplace_back(dylibPath); + } + + StringRef installName() { return _installName; } + uint32_t currentVersion() { return _currentVersion; } + uint32_t compatVersion() { return _compatVersion; } + + void setInstallName(StringRef name) { _installName = name; } + void setCompatVersion(uint32_t version) { _compatVersion = version; } + void setCurrentVersion(uint32_t version) { _currentVersion = version; } + + typedef std::function<MachODylibFile *(StringRef)> FindDylib; + + void loadReExportedDylibs(FindDylib find) { + for (ReExportedDylib &entry : _reExportedDylibs) { + entry.file = find(entry.path); + } + } + + StringRef getDSOName() const override { return _installName; } + + std::error_code doParse() override { + // Convert binary file to normalized mach-o. + auto normFile = normalized::readBinary(_mb, _ctx->arch()); + if (auto ec = normFile.takeError()) + return llvm::errorToErrorCode(std::move(ec)); + // Convert normalized mach-o to atoms. + if (auto ec = normalized::normalizedDylibToAtoms(this, **normFile, false)) + return llvm::errorToErrorCode(std::move(ec)); + return std::error_code(); + } + +private: + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name, + StringRef installName) const { + // First, check if requested symbol is directly implemented by this dylib. + auto entry = _nameToAtom.find(name); + if (entry != _nameToAtom.end()) { + // FIXME: Make this map a set and only used in assert builds. + // Note, its safe to assert here as the resolver is the only client of + // this API and it only requests exports for undefined symbols. + // If we return from here we are no longer undefined so we should never + // get here again. + assert(!entry->second.atom && "Duplicate shared library export"); + bool weakDef = entry->second.weakDef; + auto *atom = new (allocator()) MachOSharedLibraryAtom(*this, name, + installName, + weakDef); + entry->second.atom = atom; + return atom; + } + + // Next, check if symbol is implemented in some re-exported dylib. + for (const ReExportedDylib &dylib : _reExportedDylibs) { + assert(dylib.file); + auto atom = dylib.file->exports(name, installName); + if (atom.get()) + return atom; + } + + // Symbol not exported or re-exported by this dylib. + return nullptr; + } + + struct ReExportedDylib { + ReExportedDylib(StringRef p) : path(p), file(nullptr) { } + StringRef path; + MachODylibFile *file; + }; + + struct AtomAndFlags { + AtomAndFlags() : atom(nullptr), weakDef(false) { } + AtomAndFlags(bool weak) : atom(nullptr), weakDef(weak) { } + const SharedLibraryAtom *atom; + bool weakDef; + }; + + std::unique_ptr<MemoryBuffer> _mb; + MachOLinkingContext *_ctx; + StringRef _installName; + uint32_t _currentVersion; + uint32_t _compatVersion; + std::vector<ReExportedDylib> _reExportedDylibs; + mutable std::unordered_map<StringRef, AtomAndFlags> _nameToAtom; +}; + +} // end namespace mach_o +} // end namespace lld + +#endif // LLD_READER_WRITER_MACHO_FILE_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h new file mode 100644 index 00000000000..76d295841c9 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/FlatNamespaceFile.h @@ -0,0 +1,61 @@ +//===- lib/ReaderWriter/MachO/FlatNamespaceFile.h -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H +#define LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H + +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class FlatNamespaceFile : public SharedLibraryFile { +public: + FlatNamespaceFile(const MachOLinkingContext &context) + : SharedLibraryFile("flat namespace") { } + + OwningAtomPtr<SharedLibraryAtom> exports(StringRef name) const override { + return new (allocator()) MachOSharedLibraryAtom(*this, name, getDSOName(), + false); + } + + StringRef getDSOName() const override { return "flat-namespace"; } + + const AtomRange<DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_FLAT_NAMESPACE_FILE_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp new file mode 100644 index 00000000000..6cdca0a9e05 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/GOTPass.cpp @@ -0,0 +1,184 @@ +//===- lib/ReaderWriter/MachO/GOTPass.cpp -----------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all GOT kind references to real references. +/// That is, in assembly you can write something like: +/// movq foo@GOTPCREL(%rip), %rax +/// which means you want to load a pointer to "foo" out of the GOT (global +/// Offsets Table). In the object file, the Atom containing this instruction +/// has a Reference whose target is an Atom named "foo" and the Reference +/// kind is a GOT load. The linker needs to instantiate a pointer sized +/// GOT entry. This is done be creating a GOT Atom to represent that pointer +/// sized data in this pass, and altering the Atom graph so the Reference now +/// points to the GOT Atom entry (corresponding to "foo") and changing the +/// Reference Kind to reflect it is now pointing to a GOT entry (rather +/// then needing a GOT entry). +/// +/// There is one optimization the linker can do here. If the target of the GOT +/// is in the same linkage unit and does not need to be interposable, and +/// the GOT use is just a load (not some other operation), this pass can +/// transform that load into an LEA (add). This optimizes away one memory load +/// which at runtime that could stall the pipeline. This optimization only +/// works for architectures in which a (GOT) load instruction can be change to +/// an LEA instruction that is the same size. The method isGOTAccess() should +/// only return true for "canBypassGOT" if this optimization is supported. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +// +// GOT Entry Atom created by the GOT pass. +// +class GOTEntryAtom : public SimpleDefinedAtom { +public: + GOTEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) { } + + ~GOTEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeGOT; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +/// Pass for instantiating and optimizing GOT slots. +/// +class GOTPass : public Pass { +public: + GOTPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o GOT Pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at instructions accessing the GOT. + bool canBypassGOT; + if (!_archHandler.isGOTAccess(*ref, canBypassGOT)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + + if (!shouldReplaceTargetWithGOTAtom(target, canBypassGOT)) { + // Update reference kind to reflect that target is a direct accesss. + _archHandler.updateReferenceToGOT(ref, false); + } else { + // Replace the target with a reference to a GOT entry. + const DefinedAtom *gotEntry = makeGOTEntry(target); + const_cast<Reference *>(ref)->setTarget(gotEntry); + // Update reference kind to reflect that target is now a GOT entry. + _archHandler.updateReferenceToGOT(ref, true); + } + } + } + + // Sort and add all created GOT Atoms to master file + std::vector<const GOTEntryAtom *> entries; + entries.reserve(_targetToGOT.size()); + for (auto &it : _targetToGOT) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const GOTEntryAtom *left, const GOTEntryAtom *right) { + return (left->slotName().compare(right->slotName()) < 0); + }); + for (const GOTEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error(); + } + + bool shouldReplaceTargetWithGOTAtom(const Atom *target, bool canBypassGOT) { + // Accesses to shared library symbols must go through GOT. + if (isa<SharedLibraryAtom>(target)) + return true; + // Accesses to interposable symbols in same linkage unit must also go + // through GOT. + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget != nullptr && + defTarget->interposable() != DefinedAtom::interposeNo) { + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + return true; + } + // Target does not require indirection. So, if instruction allows GOT to be + // by-passed, do that optimization and don't create GOT entry. + return !canBypassGOT; + } + + const DefinedAtom *makeGOTEntry(const Atom *target) { + auto pos = _targetToGOT.find(target); + if (pos == _targetToGOT.end()) { + auto *gotEntry = new (_file.allocator()) + GOTEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToGOT[target] = gotEntry; + const ArchHandler::ReferenceInfo &nlInfo = _archHandler.stubInfo(). + nonLazyPointerReferenceToBinder; + gotEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return gotEntry; + } + return pos->second; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap<const Atom*, const GOTEntryAtom*> _targetToGOT; +}; + +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsGOTPass()); + pm.add(llvm::make_unique<GOTPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp new file mode 100644 index 00000000000..dd2ee8567ec --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -0,0 +1,489 @@ +//===-- ReaderWriter/MachO/LayoutPass.cpp - Layout atoms ------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "LayoutPass.h" +#include "lld/Core/Instrumentation.h" +#include "lld/Core/Parallel.h" +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Debug.h" +#include <algorithm> +#include <set> +#include <utility> + +using namespace lld; + +#define DEBUG_TYPE "LayoutPass" + +namespace lld { +namespace mach_o { + +static bool compareAtoms(const LayoutPass::SortKey &, + const LayoutPass::SortKey &, + LayoutPass::SortOverride customSorter); + +#ifndef NDEBUG +// Return "reason (leftval, rightval)" +static std::string formatReason(StringRef reason, int leftVal, int rightVal) { + return (Twine(reason) + " (" + Twine(leftVal) + ", " + Twine(rightVal) + ")") + .str(); +} + +// Less-than relationship of two atoms must be transitive, which is, if a < b +// and b < c, a < c must be true. This function checks the transitivity by +// checking the sort results. +static void checkTransitivity(std::vector<LayoutPass::SortKey> &vec, + LayoutPass::SortOverride customSorter) { + for (auto i = vec.begin(), e = vec.end(); (i + 1) != e; ++i) { + for (auto j = i + 1; j != e; ++j) { + assert(compareAtoms(*i, *j, customSorter)); + assert(!compareAtoms(*j, *i, customSorter)); + } + } +} + +// Helper functions to check follow-on graph. +typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + +static std::string atomToDebugString(const Atom *atom) { + const DefinedAtom *definedAtom = dyn_cast<DefinedAtom>(atom); + std::string str; + llvm::raw_string_ostream s(str); + if (definedAtom->name().empty()) + s << "<anonymous " << definedAtom << ">"; + else + s << definedAtom->name(); + s << " in "; + if (definedAtom->customSectionName().empty()) + s << "<anonymous>"; + else + s << definedAtom->customSectionName(); + s.flush(); + return str; +} + +static void showCycleDetectedError(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *atom) { + const DefinedAtom *start = atom; + llvm::dbgs() << "There's a cycle in a follow-on chain!\n"; + do { + llvm::dbgs() << " " << atomToDebugString(atom) << "\n"; + for (const Reference *ref : *atom) { + StringRef kindValStr; + if (!registry.referenceKindToString(ref->kindNamespace(), ref->kindArch(), + ref->kindValue(), kindValStr)) { + kindValStr = "<unknown>"; + } + llvm::dbgs() << " " << kindValStr + << ": " << atomToDebugString(ref->target()) << "\n"; + } + atom = followOnNexts[atom]; + } while (atom != start); + llvm::report_fatal_error("Cycle detected"); +} + +/// Exit if there's a cycle in a followon chain reachable from the +/// given root atom. Uses the tortoise and hare algorithm to detect a +/// cycle. +static void checkNoCycleInFollowonChain(const Registry ®istry, + AtomToAtomT &followOnNexts, + const DefinedAtom *root) { + const DefinedAtom *tortoise = root; + const DefinedAtom *hare = followOnNexts[root]; + while (true) { + if (!tortoise || !hare) + return; + if (tortoise == hare) + showCycleDetectedError(registry, followOnNexts, tortoise); + tortoise = followOnNexts[tortoise]; + hare = followOnNexts[followOnNexts[hare]]; + } +} + +static void checkReachabilityFromRoot(AtomToAtomT &followOnRoots, + const DefinedAtom *atom) { + if (!atom) return; + auto i = followOnRoots.find(atom); + if (i == followOnRoots.end()) { + llvm_unreachable(((Twine("Atom <") + atomToDebugString(atom) + + "> has no follow-on root!")) + .str() + .c_str()); + } + const DefinedAtom *ap = i->second; + while (true) { + const DefinedAtom *next = followOnRoots[ap]; + if (!next) { + llvm_unreachable((Twine("Atom <" + atomToDebugString(atom) + + "> is not reachable from its root!")) + .str() + .c_str()); + } + if (next == ap) + return; + ap = next; + } +} + +static void printDefinedAtoms(const File::AtomRange<DefinedAtom> &atomRange) { + for (const DefinedAtom *atom : atomRange) { + llvm::dbgs() << " file=" << atom->file().path() + << ", name=" << atom->name() + << ", size=" << atom->size() + << ", type=" << atom->contentType() + << ", ordinal=" << atom->ordinal() + << "\n"; + } +} + +/// Verify that the followon chain is sane. Should not be called in +/// release binary. +void LayoutPass::checkFollowonChain(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::checkFollowonChain"); + + // Verify that there's no cycle in follow-on chain. + std::set<const DefinedAtom *> roots; + for (const auto &ai : _followOnRoots) + roots.insert(ai.second); + for (const DefinedAtom *root : roots) + checkNoCycleInFollowonChain(_registry, _followOnNexts, root); + + // Verify that all the atoms in followOnNexts have references to + // their roots. + for (const auto &ai : _followOnNexts) { + checkReachabilityFromRoot(_followOnRoots, ai.first); + checkReachabilityFromRoot(_followOnRoots, ai.second); + } +} +#endif // #ifndef NDEBUG + +/// The function compares atoms by sorting atoms in the following order +/// a) Sorts atoms by their ordinal overrides (layout-after/ingroup) +/// b) Sorts atoms by their permissions +/// c) Sorts atoms by their content +/// d) Sorts atoms by custom sorter +/// e) Sorts atoms on how they appear using File Ordinality +/// f) Sorts atoms on how they appear within the File +static bool compareAtomsSub(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter, + std::string &reason) { + const DefinedAtom *left = lc._atom.get(); + const DefinedAtom *right = rc._atom.get(); + if (left == right) { + reason = "same"; + return false; + } + + // Find the root of the chain if it is a part of a follow-on chain. + const DefinedAtom *leftRoot = lc._root; + const DefinedAtom *rightRoot = rc._root; + + // Sort atoms by their ordinal overrides only if they fall in the same + // chain. + if (leftRoot == rightRoot) { + DEBUG(reason = formatReason("override", lc._override, rc._override)); + return lc._override < rc._override; + } + + // Sort same permissions together. + DefinedAtom::ContentPermissions leftPerms = leftRoot->permissions(); + DefinedAtom::ContentPermissions rightPerms = rightRoot->permissions(); + + if (leftPerms != rightPerms) { + DEBUG(reason = + formatReason("contentPerms", (int)leftPerms, (int)rightPerms)); + return leftPerms < rightPerms; + } + + // Sort same content types together. + DefinedAtom::ContentType leftType = leftRoot->contentType(); + DefinedAtom::ContentType rightType = rightRoot->contentType(); + + if (leftType != rightType) { + DEBUG(reason = formatReason("contentType", (int)leftType, (int)rightType)); + return leftType < rightType; + } + + // Use custom sorter if supplied. + if (customSorter) { + bool leftBeforeRight; + if (customSorter(leftRoot, rightRoot, leftBeforeRight)) + return leftBeforeRight; + } + + // Sort by .o order. + const File *leftFile = &leftRoot->file(); + const File *rightFile = &rightRoot->file(); + + if (leftFile != rightFile) { + DEBUG(reason = formatReason(".o order", (int)leftFile->ordinal(), + (int)rightFile->ordinal())); + return leftFile->ordinal() < rightFile->ordinal(); + } + + // Sort by atom order with .o file. + uint64_t leftOrdinal = leftRoot->ordinal(); + uint64_t rightOrdinal = rightRoot->ordinal(); + + if (leftOrdinal != rightOrdinal) { + DEBUG(reason = formatReason("ordinal", (int)leftRoot->ordinal(), + (int)rightRoot->ordinal())); + return leftOrdinal < rightOrdinal; + } + + llvm::errs() << "Unordered: <" << left->name() << "> <" + << right->name() << ">\n"; + llvm_unreachable("Atoms with Same Ordinal!"); +} + +static bool compareAtoms(const LayoutPass::SortKey &lc, + const LayoutPass::SortKey &rc, + LayoutPass::SortOverride customSorter) { + std::string reason; + bool result = compareAtomsSub(lc, rc, customSorter, reason); + DEBUG({ + StringRef comp = result ? "<" : ">="; + llvm::dbgs() << "Layout: '" << lc._atom.get()->name() + << "' " << comp << " '" + << rc._atom.get()->name() << "' (" << reason << ")\n"; + }); + return result; +} + +LayoutPass::LayoutPass(const Registry ®istry, SortOverride sorter) + : _registry(registry), _customSorter(std::move(sorter)) {} + +// Returns the atom immediately followed by the given atom in the followon +// chain. +const DefinedAtom *LayoutPass::findAtomFollowedBy( + const DefinedAtom *targetAtom) { + // Start from the beginning of the chain and follow the chain until + // we find the targetChain. + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + const DefinedAtom *prevAtom = atom; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + if (atom == targetAtom) + return prevAtom; + } +} + +// Check if all the atoms followed by the given target atom are of size zero. +// When this method is called, an atom being added is not of size zero and +// will be added to the head of the followon chain. All the atoms between the +// atom and the targetAtom (specified by layout-after) need to be of size zero +// in this case. Otherwise the desired layout is impossible. +bool LayoutPass::checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom) { + const DefinedAtom *atom = _followOnRoots[targetAtom]; + while (true) { + if (atom == targetAtom) + return true; + if (atom->size() != 0) + // TODO: print warning that an impossible layout is being desired by the + // user. + return false; + AtomToAtomT::iterator targetFollowOnAtomsIter = _followOnNexts.find(atom); + // The target atom must be in the chain of its root. + assert(targetFollowOnAtomsIter != _followOnNexts.end()); + atom = targetFollowOnAtomsIter->second; + } +} + +// Set the root of all atoms in targetAtom's chain to the given root. +void LayoutPass::setChainRoot(const DefinedAtom *targetAtom, + const DefinedAtom *root) { + // Walk through the followon chain and override each node's root. + while (true) { + _followOnRoots[targetAtom] = root; + AtomToAtomT::iterator targetFollowOnAtomsIter = + _followOnNexts.find(targetAtom); + if (targetFollowOnAtomsIter == _followOnNexts.end()) + return; + targetAtom = targetFollowOnAtomsIter->second; + } +} + +/// This pass builds the followon tables described by two DenseMaps +/// followOnRoots and followonNexts. +/// The followOnRoots map contains a mapping of a DefinedAtom to its root +/// The followOnNexts map contains a mapping of what DefinedAtom follows the +/// current Atom +/// The algorithm follows a very simple approach +/// a) If the atom is first seen, then make that as the root atom +/// b) The targetAtom which this Atom contains, has the root thats set to the +/// root of the current atom +/// c) If the targetAtom is part of a different tree and the root of the +/// targetAtom is itself, Chain all the atoms that are contained in the tree +/// to the current Tree +/// d) If the targetAtom is part of a different chain and the root of the +/// targetAtom until the targetAtom has all atoms of size 0, then chain the +/// targetAtoms and its tree to the current chain +void LayoutPass::buildFollowOnTable(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildFollowOnTable"); + // Set the initial size of the followon and the followonNext hash to the + // number of atoms that we have. + _followOnRoots.reserve(range.size()); + _followOnNexts.reserve(range.size()); + for (const DefinedAtom *ai : range) { + for (const Reference *r : *ai) { + if (r->kindNamespace() != lld::Reference::KindNamespace::all || + r->kindValue() != lld::Reference::kindLayoutAfter) + continue; + const DefinedAtom *targetAtom = dyn_cast<DefinedAtom>(r->target()); + _followOnNexts[ai] = targetAtom; + + // If we find a followon for the first time, let's make that atom as the + // root atom. + if (_followOnRoots.count(ai) == 0) + _followOnRoots[ai] = ai; + + auto iter = _followOnRoots.find(targetAtom); + if (iter == _followOnRoots.end()) { + // If the targetAtom is not a root of any chain, let's make the root of + // the targetAtom to the root of the current chain. + + // The expression m[i] = m[j] where m is a DenseMap and i != j is not + // safe. m[j] returns a reference, which would be invalidated when a + // rehashing occurs. If rehashing occurs to make room for m[i], m[j] + // becomes invalid, and that invalid reference would be used as the RHS + // value of the expression. + // Copy the value to workaround. + const DefinedAtom *tmp = _followOnRoots[ai]; + _followOnRoots[targetAtom] = tmp; + continue; + } + if (iter->second == targetAtom) { + // If the targetAtom is the root of a chain, the chain becomes part of + // the current chain. Rewrite the subchain's root to the current + // chain's root. + setChainRoot(targetAtom, _followOnRoots[ai]); + continue; + } + // The targetAtom is already a part of a chain. If the current atom is + // of size zero, we can insert it in the middle of the chain just + // before the target atom, while not breaking other atom's followon + // relationships. If it's not, we can only insert the current atom at + // the beginning of the chain. All the atoms followed by the target + // atom must be of size zero in that case to satisfy the followon + // relationships. + size_t currentAtomSize = ai->size(); + if (currentAtomSize == 0) { + const DefinedAtom *targetPrevAtom = findAtomFollowedBy(targetAtom); + _followOnNexts[targetPrevAtom] = ai; + const DefinedAtom *tmp = _followOnRoots[targetPrevAtom]; + _followOnRoots[ai] = tmp; + continue; + } + if (!checkAllPrevAtomsZeroSize(targetAtom)) + break; + _followOnNexts[ai] = _followOnRoots[targetAtom]; + setChainRoot(_followOnRoots[targetAtom], _followOnRoots[ai]); + } + } +} + +/// Build an ordinal override map by traversing the followon chain, and +/// assigning ordinals to each atom, if the atoms have their ordinals +/// already assigned skip the atom and move to the next. This is the +/// main map thats used to sort the atoms while comparing two atoms together +void +LayoutPass::buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range) { + ScopedTask task(getDefaultDomain(), "LayoutPass::buildOrdinalOverrideMap"); + uint64_t index = 0; + for (const DefinedAtom *ai : range) { + const DefinedAtom *atom = ai; + if (_ordinalOverrideMap.find(atom) != _ordinalOverrideMap.end()) + continue; + AtomToAtomT::iterator start = _followOnRoots.find(atom); + if (start == _followOnRoots.end()) + continue; + for (const DefinedAtom *nextAtom = start->second; nextAtom; + nextAtom = _followOnNexts[nextAtom]) { + AtomToOrdinalT::iterator pos = _ordinalOverrideMap.find(nextAtom); + if (pos == _ordinalOverrideMap.end()) + _ordinalOverrideMap[nextAtom] = index++; + } + } +} + +std::vector<LayoutPass::SortKey> +LayoutPass::decorate(File::AtomRange<DefinedAtom> &atomRange) const { + std::vector<SortKey> ret; + for (OwningAtomPtr<DefinedAtom> &atom : atomRange.owning_ptrs()) { + auto ri = _followOnRoots.find(atom.get()); + auto oi = _ordinalOverrideMap.find(atom.get()); + const auto *root = (ri == _followOnRoots.end()) ? atom.get() : ri->second; + uint64_t override = (oi == _ordinalOverrideMap.end()) ? 0 : oi->second; + ret.push_back(SortKey(std::move(atom), root, override)); + } + return ret; +} + +void LayoutPass::undecorate(File::AtomRange<DefinedAtom> &atomRange, + std::vector<SortKey> &keys) const { + size_t i = 0; + for (SortKey &k : keys) + atomRange[i++] = std::move(k._atom); +} + +/// Perform the actual pass +llvm::Error LayoutPass::perform(SimpleFile &mergedFile) { + DEBUG(llvm::dbgs() << "******** Laying out atoms:\n"); + // sort the atoms + ScopedTask task(getDefaultDomain(), "LayoutPass"); + File::AtomRange<DefinedAtom> atomRange = mergedFile.defined(); + + // Build follow on tables + buildFollowOnTable(atomRange); + + // Check the structure of followon graph if running in debug mode. + DEBUG(checkFollowonChain(atomRange)); + + // Build override maps + buildOrdinalOverrideMap(atomRange); + + DEBUG({ + llvm::dbgs() << "unsorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + std::vector<LayoutPass::SortKey> vec = decorate(atomRange); + parallel_sort(vec.begin(), vec.end(), + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); + DEBUG(checkTransitivity(vec, _customSorter)); + undecorate(atomRange, vec); + + DEBUG({ + llvm::dbgs() << "sorted atoms:\n"; + printDefinedAtoms(atomRange); + }); + + DEBUG(llvm::dbgs() << "******** Finished laying out atoms\n"); + return llvm::Error(); +} + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<LayoutPass>( + ctx.registry(), [&](const DefinedAtom * left, const DefinedAtom * right, + bool & leftBeforeRight) ->bool { + return ctx.customAtomOrderer(left, right, leftBeforeRight); + })); +} + +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h new file mode 100644 index 00000000000..c18777eded0 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/LayoutPass.h @@ -0,0 +1,119 @@ +//===------ lib/ReaderWriter/MachO/LayoutPass.h - Handles Layout of atoms -===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_LAYOUT_PASS_H +#define LLD_READER_WRITER_MACHO_LAYOUT_PASS_H + +#include "lld/Core/File.h" +#include "lld/Core/Pass.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/DenseMap.h" +#include <map> +#include <string> +#include <vector> + +namespace lld { +class DefinedAtom; +class SimpleFile; + +namespace mach_o { + +/// This linker pass does the layout of the atoms. The pass is done after the +/// order their .o files were found on the command line, then by order of the +/// atoms (address) in the .o file. But some atoms have a preferred location +/// in their section (such as pinned to the start or end of the section), so +/// the sort must take that into account too. +class LayoutPass : public Pass { +public: + struct SortKey { + SortKey(OwningAtomPtr<DefinedAtom> &&atom, + const DefinedAtom *root, uint64_t override) + : _atom(std::move(atom)), _root(root), _override(override) {} + OwningAtomPtr<DefinedAtom> _atom; + const DefinedAtom *_root; + uint64_t _override; + + // Note, these are only here to appease MSVC bots which didn't like + // the same methods being implemented/deleted in OwningAtomPtr. + SortKey(SortKey &&key) : _atom(std::move(key._atom)), _root(key._root), + _override(key._override) { + key._root = nullptr; + } + + SortKey &operator=(SortKey &&key) { + _atom = std::move(key._atom); + _root = key._root; + key._root = nullptr; + _override = key._override; + return *this; + } + + private: + SortKey(const SortKey &) = delete; + void operator=(const SortKey&) = delete; + }; + + typedef std::function<bool (const DefinedAtom *left, const DefinedAtom *right, + bool &leftBeforeRight)> SortOverride; + + LayoutPass(const Registry ®istry, SortOverride sorter); + + /// Sorts atoms in mergedFile by content type then by command line order. + llvm::Error perform(SimpleFile &mergedFile) override; + + ~LayoutPass() override = default; + +private: + // Build the followOn atoms chain as specified by the kindLayoutAfter + // reference type + void buildFollowOnTable(const File::AtomRange<DefinedAtom> &range); + + // Build a map of Atoms to ordinals for sorting the atoms + void buildOrdinalOverrideMap(const File::AtomRange<DefinedAtom> &range); + + const Registry &_registry; + SortOverride _customSorter; + + typedef llvm::DenseMap<const DefinedAtom *, const DefinedAtom *> AtomToAtomT; + typedef llvm::DenseMap<const DefinedAtom *, uint64_t> AtomToOrdinalT; + + // A map to be used to sort atoms. It represents the order of atoms in the + // result; if Atom X is mapped to atom Y in this map, X will be located + // immediately before Y in the output file. Y might be mapped to another + // atom, constructing a follow-on chain. An atom cannot be mapped to more + // than one atom unless all but one atom are of size zero. + AtomToAtomT _followOnNexts; + + // A map to be used to sort atoms. It's a map from an atom to its root of + // follow-on chain. A root atom is mapped to itself. If an atom is not in + // _followOnNexts, the atom is not in this map, and vice versa. + AtomToAtomT _followOnRoots; + + AtomToOrdinalT _ordinalOverrideMap; + + // Helper methods for buildFollowOnTable(). + const DefinedAtom *findAtomFollowedBy(const DefinedAtom *targetAtom); + bool checkAllPrevAtomsZeroSize(const DefinedAtom *targetAtom); + + void setChainRoot(const DefinedAtom *targetAtom, const DefinedAtom *root); + + std::vector<SortKey> decorate(File::AtomRange<DefinedAtom> &atomRange) const; + + void undecorate(File::AtomRange<DefinedAtom> &atomRange, + std::vector<SortKey> &keys) const; + + // Check if the follow-on graph is a correct structure. For debugging only. + void checkFollowonChain(const File::AtomRange<DefinedAtom> &range); +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_LAYOUT_PASS_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp new file mode 100644 index 00000000000..05375f145d3 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOLinkingContext.cpp @@ -0,0 +1,1117 @@ +//===- lib/ReaderWriter/MachO/MachOLinkingContext.cpp ---------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "ArchHandler.h" +#include "File.h" +#include "FlatNamespaceFile.h" +#include "MachONormalizedFile.h" +#include "MachOPasses.h" +#include "SectCreateFile.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/PassManager.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Writer.h" +#include "lld/Driver/Driver.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Config/config.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/Path.h" +#include <algorithm> + +#if defined(HAVE_CXXABI_H) +#include <cxxabi.h> +#endif + +using lld::mach_o::ArchHandler; +using lld::mach_o::MachOFile; +using lld::mach_o::MachODylibFile; +using namespace llvm::MachO; + +namespace lld { + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint32_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector<StringRef, 3> parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 65535) + return true; + result = num << 16; + + if (parts.size() > 1) { + if (llvm::getAsUnsignedInteger(parts[1], 10, num)) + return true; + if (num > 255) + return true; + result |= (num << 8); + } + + if (parts.size() > 2) { + if (llvm::getAsUnsignedInteger(parts[2], 10, num)) + return true; + if (num > 255) + return true; + result |= num; + } + + return false; +} + +bool MachOLinkingContext::parsePackedVersion(StringRef str, uint64_t &result) { + result = 0; + + if (str.empty()) + return false; + + SmallVector<StringRef, 5> parts; + llvm::SplitString(str, parts, "."); + + unsigned long long num; + if (llvm::getAsUnsignedInteger(parts[0], 10, num)) + return true; + if (num > 0xFFFFFF) + return true; + result = num << 40; + + unsigned Shift = 30; + for (StringRef str : llvm::makeArrayRef(parts).slice(1)) { + if (llvm::getAsUnsignedInteger(str, 10, num)) + return true; + if (num > 0x3FF) + return true; + result |= (num << Shift); + Shift -= 10; + } + + return false; +} + +MachOLinkingContext::ArchInfo MachOLinkingContext::_s_archInfos[] = { + { "x86_64", arch_x86_64, true, CPU_TYPE_X86_64, CPU_SUBTYPE_X86_64_ALL }, + { "i386", arch_x86, true, CPU_TYPE_I386, CPU_SUBTYPE_X86_ALL }, + { "ppc", arch_ppc, false, CPU_TYPE_POWERPC, CPU_SUBTYPE_POWERPC_ALL }, + { "armv6", arch_armv6, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V6 }, + { "armv7", arch_armv7, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7 }, + { "armv7s", arch_armv7s, true, CPU_TYPE_ARM, CPU_SUBTYPE_ARM_V7S }, + { "arm64", arch_arm64, true, CPU_TYPE_ARM64, CPU_SUBTYPE_ARM64_ALL }, + { "", arch_unknown,false, 0, 0 } +}; + +MachOLinkingContext::Arch +MachOLinkingContext::archFromCpuType(uint32_t cputype, uint32_t cpusubtype) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if ((info->cputype == cputype) && (info->cpusubtype == cpusubtype)) + return info->arch; + } + return arch_unknown; +} + +MachOLinkingContext::Arch +MachOLinkingContext::archFromName(StringRef archName) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->archName.equals(archName)) + return info->arch; + } + return arch_unknown; +} + +StringRef MachOLinkingContext::nameFromArch(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->archName; + } + return "<unknown>"; +} + +uint32_t MachOLinkingContext::cpuTypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cputype; + } + llvm_unreachable("Unknown arch type"); +} + +uint32_t MachOLinkingContext::cpuSubtypeFromArch(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) + return info->cpusubtype; + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isThinObjectFile(StringRef path, Arch &arch) { + return mach_o::normalized::isThinObjectFile(path, arch); +} + +bool MachOLinkingContext::sliceFromFatFile(MemoryBufferRef mb, uint32_t &offset, + uint32_t &size) { + return mach_o::normalized::sliceFromFatFile(mb, _arch, offset, size); +} + +MachOLinkingContext::MachOLinkingContext() {} + +MachOLinkingContext::~MachOLinkingContext() { + // Atoms are allocated on BumpPtrAllocator's on File's. + // As we transfer atoms from one file to another, we need to clear all of the + // atoms before we remove any of the BumpPtrAllocator's. + auto &nodes = getNodes(); + for (unsigned i = 0, e = nodes.size(); i != e; ++i) { + FileNode *node = dyn_cast<FileNode>(nodes[i].get()); + if (!node) + continue; + File *file = node->getFile(); + file->clearAtoms(); + } +} + +void MachOLinkingContext::configure(HeaderFileType type, Arch arch, OS os, + uint32_t minOSVersion, + bool exportDynamicSymbols) { + _outputMachOType = type; + _arch = arch; + _os = os; + _osMinVersion = minOSVersion; + + // If min OS not specified on command line, use reasonable defaults. + // Note that we only do sensible defaults when emitting something other than + // object and preload. + if (_outputMachOType != llvm::MachO::MH_OBJECT && + _outputMachOType != llvm::MachO::MH_PRELOAD) { + if (minOSVersion == 0) { + switch (_arch) { + case arch_x86_64: + case arch_x86: + parsePackedVersion("10.8", _osMinVersion); + _os = MachOLinkingContext::OS::macOSX; + break; + case arch_armv6: + case arch_armv7: + case arch_armv7s: + case arch_arm64: + parsePackedVersion("7.0", _osMinVersion); + _os = MachOLinkingContext::OS::iOS; + break; + default: + break; + } + } + } + + switch (_outputMachOType) { + case llvm::MachO::MH_EXECUTE: + // If targeting newer OS, use _main + if (minOS("10.8", "6.0")) { + _entrySymbolName = "_main"; + } else { + // If targeting older OS, use start (in crt1.o) + _entrySymbolName = "start"; + } + + // __PAGEZERO defaults to 4GB on 64-bit (except for PP64 which lld does not + // support) and 4KB on 32-bit. + if (is64Bit(_arch)) { + _pageZeroSize = 0x100000000; + } else { + _pageZeroSize = 0x1000; + } + + // Initial base address is __PAGEZERO size. + _baseAddress = _pageZeroSize; + + // Make PIE by default when targetting newer OSs. + switch (os) { + case OS::macOSX: + if (minOSVersion >= 0x000A0700) // MacOSX 10.7 + _pie = true; + break; + case OS::iOS: + if (minOSVersion >= 0x00040300) // iOS 4.3 + _pie = true; + break; + case OS::iOS_simulator: + _pie = true; + break; + case OS::unknown: + break; + } + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_DYLIB: + setGlobalsAreDeadStripRoots(exportDynamicSymbols); + break; + case llvm::MachO::MH_BUNDLE: + break; + case llvm::MachO::MH_OBJECT: + _printRemainingUndefines = false; + _allowRemainingUndefines = true; + default: + break; + } + + // Set default segment page sizes based on arch. + if (arch == arch_arm64) + _pageSize = 4*4096; +} + +uint32_t MachOLinkingContext::getCPUType() const { + return cpuTypeFromArch(_arch); +} + +uint32_t MachOLinkingContext::getCPUSubType() const { + return cpuSubtypeFromArch(_arch); +} + +bool MachOLinkingContext::is64Bit(Arch arch) { + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->cputype & CPU_ARCH_ABI64); + } + } + // unknown archs are not 64-bit. + return false; +} + +bool MachOLinkingContext::isHostEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return (info->littleEndian == llvm::sys::IsLittleEndianHost); + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::isBigEndian(Arch arch) { + assert(arch != arch_unknown); + for (ArchInfo *info = _s_archInfos; !info->archName.empty(); ++info) { + if (info->arch == arch) { + return ! info->littleEndian; + } + } + llvm_unreachable("Unknown arch type"); +} + +bool MachOLinkingContext::is64Bit() const { + return is64Bit(_arch); +} + +bool MachOLinkingContext::outputTypeHasEntry() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsStubsPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + return !_outputMachOTypeStatic; + case MH_DYLIB: + case MH_BUNDLE: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsGOTPass() const { + // GOT pass not used in -r mode. + if (_outputMachOType == MH_OBJECT) + return false; + // Only some arches use GOT pass. + switch (_arch) { + case arch_x86_64: + case arch_arm64: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsCompactUnwindPass() const { + switch (_outputMachOType) { + case MH_EXECUTE: + case MH_DYLIB: + case MH_BUNDLE: + return archHandler().needsCompactUnwind(); + default: + return false; + } +} + +bool MachOLinkingContext::needsObjCPass() const { + // ObjC pass is only needed if any of the inputs were ObjC. + return _objcConstraint != objc_unknown; +} + +bool MachOLinkingContext::needsShimPass() const { + // Shim pass only used in final executables. + if (_outputMachOType == MH_OBJECT) + return false; + // Only 32-bit arm arches use Shim pass. + switch (_arch) { + case arch_armv6: + case arch_armv7: + case arch_armv7s: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::needsTLVPass() const { + switch (_outputMachOType) { + case MH_BUNDLE: + case MH_EXECUTE: + case MH_DYLIB: + return true; + default: + return false; + } +} + +StringRef MachOLinkingContext::binderSymbolName() const { + return archHandler().stubInfo().binderSymbolName; +} + +bool MachOLinkingContext::minOS(StringRef mac, StringRef iOS) const { + uint32_t parsedVersion; + switch (_os) { + case OS::macOSX: + if (parsePackedVersion(mac, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::iOS: + case OS::iOS_simulator: + if (parsePackedVersion(iOS, parsedVersion)) + return false; + return _osMinVersion >= parsedVersion; + case OS::unknown: + // If we don't know the target, then assume that we don't meet the min OS. + // This matches the ld64 behaviour + return false; + } + llvm_unreachable("invalid OS enum"); +} + +bool MachOLinkingContext::addEntryPointLoadCommand() const { + if ((_outputMachOType == MH_EXECUTE) && !_outputMachOTypeStatic) { + return minOS("10.8", "6.0"); + } + return false; +} + +bool MachOLinkingContext::addUnixThreadLoadCommand() const { + switch (_outputMachOType) { + case MH_EXECUTE: + if (_outputMachOTypeStatic) + return true; + else + return !minOS("10.8", "6.0"); + break; + case MH_DYLINKER: + case MH_PRELOAD: + return true; + default: + return false; + } +} + +bool MachOLinkingContext::pathExists(StringRef path) const { + if (!_testingFileUsage) + return llvm::sys::fs::exists(path.str()); + + // Otherwise, we're in test mode: only files explicitly provided on the + // command-line exist. + std::string key = path.str(); + std::replace(key.begin(), key.end(), '\\', '/'); + return _existingPaths.find(key) != _existingPaths.end(); +} + +bool MachOLinkingContext::fileExists(StringRef path) const { + bool found = pathExists(path); + // Log search misses. + if (!found) + addInputFileNotFound(path); + + // When testing, file is never opened, so logging is done here. + if (_testingFileUsage && found) + addInputFileDependency(path); + + return found; +} + +void MachOLinkingContext::setSysLibRoots(const StringRefVector &paths) { + _syslibRoots = paths; +} + +void MachOLinkingContext::addRpath(StringRef rpath) { + _rpaths.push_back(rpath); +} + +void MachOLinkingContext::addModifiedSearchDir(StringRef libPath, + bool isSystemPath) { + bool addedModifiedPath = false; + + // -syslibroot only applies to absolute paths. + if (libPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, libPath); + if (pathExists(path)) { + _searchDirs.push_back(path.str().copy(_allocator)); + addedModifiedPath = true; + } + } + } + + if (addedModifiedPath) + return; + + // Finally, if only one -syslibroot is given, system paths which aren't in it + // get suppressed. + if (_syslibRoots.size() != 1 || !isSystemPath) { + if (pathExists(libPath)) { + _searchDirs.push_back(libPath); + } + } +} + +void MachOLinkingContext::addFrameworkSearchDir(StringRef fwPath, + bool isSystemPath) { + bool pathAdded = false; + + // -syslibroot only used with to absolute framework search paths. + if (fwPath.startswith("/")) { + for (auto syslibRoot : _syslibRoots) { + SmallString<256> path(syslibRoot); + llvm::sys::path::append(path, fwPath); + if (pathExists(path)) { + _frameworkDirs.push_back(path.str().copy(_allocator)); + pathAdded = true; + } + } + } + // If fwPath found in any -syslibroot, then done. + if (pathAdded) + return; + + // If only one -syslibroot, system paths not in that SDK are suppressed. + if (isSystemPath && (_syslibRoots.size() == 1)) + return; + + // Only use raw fwPath if that directory exists. + if (pathExists(fwPath)) + _frameworkDirs.push_back(fwPath); +} + +llvm::Optional<StringRef> +MachOLinkingContext::searchDirForLibrary(StringRef path, + StringRef libName) const { + SmallString<256> fullPath; + if (libName.endswith(".o")) { + // A request ending in .o is special: just search for the file directly. + fullPath.assign(path); + llvm::sys::path::append(fullPath, libName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + return llvm::None; + } + + // Search for dynamic library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".dylib"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + // If not, try for a static library + fullPath.assign(path); + llvm::sys::path::append(fullPath, Twine("lib") + libName + ".a"); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + + return llvm::None; +} + +llvm::Optional<StringRef> +MachOLinkingContext::searchLibrary(StringRef libName) const { + SmallString<256> path; + for (StringRef dir : searchDirs()) { + llvm::Optional<StringRef> searchDir = searchDirForLibrary(dir, libName); + if (searchDir) + return searchDir; + } + + return llvm::None; +} + +llvm::Optional<StringRef> +MachOLinkingContext::findPathForFramework(StringRef fwName) const{ + SmallString<256> fullPath; + for (StringRef dir : frameworkDirs()) { + fullPath.assign(dir); + llvm::sys::path::append(fullPath, Twine(fwName) + ".framework", fwName); + if (fileExists(fullPath)) + return fullPath.str().copy(_allocator); + } + + return llvm::None; +} + +bool MachOLinkingContext::validateImpl(raw_ostream &diagnostics) { + // TODO: if -arch not specified, look at arch of first .o file. + + if (_currentVersion && _outputMachOType != MH_DYLIB) { + diagnostics << "error: -current_version can only be used with dylibs\n"; + return false; + } + + if (_compatibilityVersion && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -compatibility_version can only be used with dylibs\n"; + return false; + } + + if (_deadStrippableDylib && _outputMachOType != MH_DYLIB) { + diagnostics + << "error: -mark_dead_strippable_dylib can only be used with dylibs.\n"; + return false; + } + + if (!_bundleLoader.empty() && outputMachOType() != MH_BUNDLE) { + diagnostics + << "error: -bundle_loader can only be used with Mach-O bundles\n"; + return false; + } + + // If -exported_symbols_list used, all exported symbols must be defined. + if (_exportMode == ExportMode::whiteList) { + for (const auto &symbol : _exportedSymbols) + addInitialUndefinedSymbol(symbol.getKey()); + } + + // If -dead_strip, set up initial live symbols. + if (deadStrip()) { + // Entry point is live. + if (outputTypeHasEntry()) + addDeadStripRoot(entrySymbolName()); + // Lazy binding helper is live. + if (needsStubsPass()) + addDeadStripRoot(binderSymbolName()); + // If using -exported_symbols_list, make all exported symbols live. + if (_exportMode == ExportMode::whiteList) { + setGlobalsAreDeadStripRoots(false); + for (const auto &symbol : _exportedSymbols) + addDeadStripRoot(symbol.getKey()); + } + } + + addOutputFileDependency(outputPath()); + + return true; +} + +void MachOLinkingContext::addPasses(PassManager &pm) { + // objc pass should be before layout pass. Otherwise test cases may contain + // no atoms which confuses the layout pass. + if (needsObjCPass()) + mach_o::addObjCPass(pm, *this); + mach_o::addLayoutPass(pm, *this); + if (needsStubsPass()) + mach_o::addStubsPass(pm, *this); + if (needsCompactUnwindPass()) + mach_o::addCompactUnwindPass(pm, *this); + if (needsGOTPass()) + mach_o::addGOTPass(pm, *this); + if (needsTLVPass()) + mach_o::addTLVPass(pm, *this); + if (needsShimPass()) + mach_o::addShimPass(pm, *this); // Shim pass must run after stubs pass. +} + +Writer &MachOLinkingContext::writer() const { + if (!_writer) + _writer = createWriterMachO(*this); + return *_writer; +} + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MachOLinkingContext::getMemoryBuffer(StringRef path) { + addInputFileDependency(path); + + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = + MemoryBuffer::getFileOrSTDIN(path); + if (std::error_code ec = mbOrErr.getError()) + return ec; + std::unique_ptr<MemoryBuffer> mb = std::move(mbOrErr.get()); + + // If buffer contains a fat file, find required arch in fat buffer + // and switch buffer to point to just that required slice. + uint32_t offset; + uint32_t size; + if (sliceFromFatFile(mb->getMemBufferRef(), offset, size)) + return MemoryBuffer::getFileSlice(path, size, offset); + return std::move(mb); +} + +MachODylibFile* MachOLinkingContext::loadIndirectDylib(StringRef path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> mbOrErr = getMemoryBuffer(path); + if (mbOrErr.getError()) + return nullptr; + + ErrorOr<std::unique_ptr<File>> fileOrErr = + registry().loadFile(std::move(mbOrErr.get())); + if (!fileOrErr) + return nullptr; + std::unique_ptr<File> &file = fileOrErr.get(); + file->parse(); + MachODylibFile *result = reinterpret_cast<MachODylibFile *>(file.get()); + // Node object now owned by _indirectDylibs vector. + _indirectDylibs.push_back(std::move(file)); + return result; +} + +MachODylibFile* MachOLinkingContext::findIndirectDylib(StringRef path) { + // See if already loaded. + auto pos = _pathToDylibMap.find(path); + if (pos != _pathToDylibMap.end()) + return pos->second; + + // Search -L paths if of the form "libXXX.dylib" + std::pair<StringRef, StringRef> split = path.rsplit('/'); + StringRef leafName = split.second; + if (leafName.startswith("lib") && leafName.endswith(".dylib")) { + // FIXME: Need to enhance searchLibrary() to only look for .dylib + auto libPath = searchLibrary(leafName); + if (libPath) + return loadIndirectDylib(libPath.getValue()); + } + + // Try full path with sysroot. + for (StringRef sysPath : _syslibRoots) { + SmallString<256> fullPath; + fullPath.assign(sysPath); + llvm::sys::path::append(fullPath, path); + if (pathExists(fullPath)) + return loadIndirectDylib(fullPath); + } + + // Try full path. + if (pathExists(path)) { + return loadIndirectDylib(path); + } + + return nullptr; +} + +uint32_t MachOLinkingContext::dylibCurrentVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->currentVersion(); + else + return 0x1000; // 1.0 +} + +uint32_t MachOLinkingContext::dylibCompatVersion(StringRef installName) const { + auto pos = _pathToDylibMap.find(installName); + if (pos != _pathToDylibMap.end()) + return pos->second->compatVersion(); + else + return 0x1000; // 1.0 +} + +void MachOLinkingContext::createImplicitFiles( + std::vector<std::unique_ptr<File> > &result) { + // Add indirect dylibs by asking each linked dylib to add its indirects. + // Iterate until no more dylibs get loaded. + size_t dylibCount = 0; + while (dylibCount != _allDylibs.size()) { + dylibCount = _allDylibs.size(); + for (MachODylibFile *dylib : _allDylibs) { + dylib->loadReExportedDylibs([this] (StringRef path) -> MachODylibFile* { + return findIndirectDylib(path); }); + } + } + + // Let writer add output type specific extras. + writer().createImplicitFiles(result); + + // If undefinedMode is != error, add a FlatNamespaceFile instance. This will + // provide a SharedLibraryAtom for symbols that aren't defined elsewhere. + if (undefinedMode() != UndefinedMode::error) { + result.emplace_back(new mach_o::FlatNamespaceFile(*this)); + _flatNamespaceFile = result.back().get(); + } +} + +void MachOLinkingContext::registerDylib(MachODylibFile *dylib, + bool upward) const { + std::lock_guard<std::mutex> lock(_dylibsMutex); + _allDylibs.insert(dylib); + _pathToDylibMap[dylib->installName()] = dylib; + // If path is different than install name, register path too. + if (!dylib->path().equals(dylib->installName())) + _pathToDylibMap[dylib->path()] = dylib; + if (upward) + _upwardDylibs.insert(dylib); +} + +bool MachOLinkingContext::isUpwardDylib(StringRef installName) const { + for (MachODylibFile *dylib : _upwardDylibs) { + if (dylib->installName().equals(installName)) + return true; + } + return false; +} + +ArchHandler &MachOLinkingContext::archHandler() const { + if (!_archHandler) + _archHandler = ArchHandler::create(_arch); + return *_archHandler; +} + +void MachOLinkingContext::addSectionAlignment(StringRef seg, StringRef sect, + uint16_t align) { + SectionAlign entry = { seg, sect, align }; + _sectAligns.push_back(entry); +} + +void MachOLinkingContext::addSectCreateSection( + StringRef seg, StringRef sect, + std::unique_ptr<MemoryBuffer> content) { + + if (!_sectCreateFile) { + auto sectCreateFile = llvm::make_unique<mach_o::SectCreateFile>(); + _sectCreateFile = sectCreateFile.get(); + getNodes().push_back(llvm::make_unique<FileNode>(std::move(sectCreateFile))); + } + + assert(_sectCreateFile && "sectcreate file does not exist."); + _sectCreateFile->addSection(seg, sect, std::move(content)); +} + +bool MachOLinkingContext::sectionAligned(StringRef seg, StringRef sect, + uint16_t &align) const { + for (const SectionAlign &entry : _sectAligns) { + if (seg.equals(entry.segmentName) && sect.equals(entry.sectionName)) { + align = entry.align; + return true; + } + } + return false; +} + +void MachOLinkingContext::addExportSymbol(StringRef sym) { + // Support old crufty export lists with bogus entries. + if (sym.endswith(".eh") || sym.startswith(".objc_category_name_")) { + llvm::errs() << "warning: ignoring " << sym << " in export list\n"; + return; + } + // Only i386 MacOSX uses old ABI, so don't change those. + if ((_os != OS::macOSX) || (_arch != arch_x86)) { + // ObjC has two differnent ABIs. Be nice and allow one export list work for + // both ABIs by renaming symbols. + if (sym.startswith(".objc_class_name_")) { + std::string abi2className("_OBJC_CLASS_$_"); + abi2className += sym.substr(17); + _exportedSymbols.insert(copy(abi2className)); + std::string abi2metaclassName("_OBJC_METACLASS_$_"); + abi2metaclassName += sym.substr(17); + _exportedSymbols.insert(copy(abi2metaclassName)); + return; + } + } + + // FIXME: Support wildcards. + _exportedSymbols.insert(sym); +} + +bool MachOLinkingContext::exportSymbolNamed(StringRef sym) const { + switch (_exportMode) { + case ExportMode::globals: + llvm_unreachable("exportSymbolNamed() should not be called in this mode"); + break; + case ExportMode::whiteList: + return _exportedSymbols.count(sym); + case ExportMode::blackList: + return !_exportedSymbols.count(sym); + } + llvm_unreachable("_exportMode unknown enum value"); +} + +std::string MachOLinkingContext::demangle(StringRef symbolName) const { + // Only try to demangle symbols if -demangle on command line + if (!demangleSymbols()) + return symbolName; + + // Only try to demangle symbols that look like C++ symbols + if (!symbolName.startswith("__Z")) + return symbolName; + +#if defined(HAVE_CXXABI_H) + SmallString<256> symBuff; + StringRef nullTermSym = Twine(symbolName).toNullTerminatedStringRef(symBuff); + // Mach-O has extra leading underscore that needs to be removed. + const char *cstr = nullTermSym.data() + 1; + int status; + char *demangled = abi::__cxa_demangle(cstr, nullptr, nullptr, &status); + if (demangled) { + std::string result(demangled); + // __cxa_demangle() always uses a malloc'ed buffer to return the result. + free(demangled); + return result; + } +#endif + + return symbolName; +} + +std::error_code MachOLinkingContext::createDependencyFile(StringRef path) { + std::error_code ec; + _dependencyInfo = std::unique_ptr<llvm::raw_fd_ostream>(new + llvm::raw_fd_ostream(path, ec, llvm::sys::fs::F_None)); + if (ec) { + _dependencyInfo.reset(); + return ec; + } + + char linkerVersionOpcode = 0x00; + *_dependencyInfo << linkerVersionOpcode; + *_dependencyInfo << "lld"; // FIXME + *_dependencyInfo << '\0'; + + return std::error_code(); +} + +void MachOLinkingContext::addInputFileDependency(StringRef path) const { + if (!_dependencyInfo) + return; + + char inputFileOpcode = 0x10; + *_dependencyInfo << inputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::addInputFileNotFound(StringRef path) const { + if (!_dependencyInfo) + return; + + char inputFileOpcode = 0x11; + *_dependencyInfo << inputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::addOutputFileDependency(StringRef path) const { + if (!_dependencyInfo) + return; + + char outputFileOpcode = 0x40; + *_dependencyInfo << outputFileOpcode; + *_dependencyInfo << path; + *_dependencyInfo << '\0'; +} + +void MachOLinkingContext::appendOrderedSymbol(StringRef symbol, + StringRef filename) { + // To support sorting static functions which may have the same name in + // multiple .o files, _orderFiles maps the symbol name to a vector + // of OrderFileNode each of which can specify a file prefix. + OrderFileNode info; + if (!filename.empty()) + info.fileFilter = copy(filename); + info.order = _orderFileEntries++; + _orderFiles[symbol].push_back(info); +} + +bool +MachOLinkingContext::findOrderOrdinal(const std::vector<OrderFileNode> &nodes, + const DefinedAtom *atom, + unsigned &ordinal) { + const File *objFile = &atom->file(); + assert(objFile); + StringRef objName = objFile->path(); + std::pair<StringRef, StringRef> dirAndLeaf = objName.rsplit('/'); + if (!dirAndLeaf.second.empty()) + objName = dirAndLeaf.second; + for (const OrderFileNode &info : nodes) { + if (info.fileFilter.empty()) { + // Have unprefixed symbol name in order file that matches this atom. + ordinal = info.order; + return true; + } + if (info.fileFilter.equals(objName)) { + // Have prefixed symbol name in order file that matches atom's path. + ordinal = info.order; + return true; + } + } + return false; +} + +bool MachOLinkingContext::customAtomOrderer(const DefinedAtom *left, + const DefinedAtom *right, + bool &leftBeforeRight) const { + // No custom sorting if no order file entries. + if (!_orderFileEntries) + return false; + + // Order files can only order named atoms. + StringRef leftName = left->name(); + StringRef rightName = right->name(); + if (leftName.empty() || rightName.empty()) + return false; + + // If neither is in order file list, no custom sorter. + auto leftPos = _orderFiles.find(leftName); + auto rightPos = _orderFiles.find(rightName); + bool leftIsOrdered = (leftPos != _orderFiles.end()); + bool rightIsOrdered = (rightPos != _orderFiles.end()); + if (!leftIsOrdered && !rightIsOrdered) + return false; + + // There could be multiple symbols with same name but different file prefixes. + unsigned leftOrder; + unsigned rightOrder; + bool foundLeft = + leftIsOrdered && findOrderOrdinal(leftPos->getValue(), left, leftOrder); + bool foundRight = rightIsOrdered && + findOrderOrdinal(rightPos->getValue(), right, rightOrder); + if (!foundLeft && !foundRight) + return false; + + // If only one is in order file list, ordered one goes first. + if (foundLeft != foundRight) + leftBeforeRight = foundLeft; + else + leftBeforeRight = (leftOrder < rightOrder); + + return true; +} + +static bool isLibrary(const std::unique_ptr<Node> &elem) { + if (FileNode *node = dyn_cast<FileNode>(const_cast<Node *>(elem.get()))) { + File *file = node->getFile(); + return isa<SharedLibraryFile>(file) || isa<ArchiveLibraryFile>(file); + } + return false; +} + +// The darwin linker processes input files in two phases. The first phase +// links in all object (.o) files in command line order. The second phase +// links in libraries in command line order. +// In this function we reorder the input files so that all the object files +// comes before any library file. We also make a group for the library files +// so that the Resolver will reiterate over the libraries as long as we find +// new undefines from libraries. +void MachOLinkingContext::finalizeInputFiles() { + std::vector<std::unique_ptr<Node>> &elements = getNodes(); + std::stable_sort(elements.begin(), elements.end(), + [](const std::unique_ptr<Node> &a, + const std::unique_ptr<Node> &b) { + return !isLibrary(a) && isLibrary(b); + }); + size_t numLibs = std::count_if(elements.begin(), elements.end(), isLibrary); + elements.push_back(llvm::make_unique<GroupEnd>(numLibs)); +} + +llvm::Error MachOLinkingContext::handleLoadedFile(File &file) { + auto *machoFile = dyn_cast<MachOFile>(&file); + if (!machoFile) + return llvm::Error(); + + // Check that the arch of the context matches that of the file. + // Also set the arch of the context if it didn't have one. + if (_arch == arch_unknown) { + _arch = machoFile->arch(); + } else if (machoFile->arch() != arch_unknown && machoFile->arch() != _arch) { + // Archs are different. + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked due to incompatible architecture")); + } + + // Check that the OS of the context matches that of the file. + // Also set the OS of the context if it didn't have one. + if (_os == OS::unknown) { + _os = machoFile->OS(); + } else if (machoFile->OS() != OS::unknown && machoFile->OS() != _os) { + // OSes are different. + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked due to incompatible operating systems")); + } + + // Check that if the objc info exists, that it is compatible with the target + // OS. + switch (machoFile->objcConstraint()) { + case objc_unknown: + // The file is not compiled with objc, so skip the checks. + break; + case objc_gc_only: + case objc_supports_gc: + llvm_unreachable("GC support should already have thrown an error"); + case objc_retainReleaseForSimulator: + // The file is built with simulator objc, so make sure that the context + // is also building with simulator support. + if (_os != OS::iOS_simulator) + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked. It contains ObjC built for the simulator" + " while we are linking a non-simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainReleaseForSimulator) && + "Must be linking with retain/release for the simulator"); + _objcConstraint = objc_retainReleaseForSimulator; + break; + case objc_retainRelease: + // The file is built without simulator objc, so make sure that the + // context is also building without simulator support. + if (_os == OS::iOS_simulator) + return llvm::make_error<GenericError>(file.path() + + Twine(" cannot be linked. It contains ObjC built for a non-simulator" + " target while we are linking a simulator target")); + assert((_objcConstraint == objc_unknown || + _objcConstraint == objc_retainRelease) && + "Must be linking with retain/release for a non-simulator target"); + _objcConstraint = objc_retainRelease; + break; + } + + // Check that the swift version of the context matches that of the file. + // Also set the swift version of the context if it didn't have one. + if (!_swiftVersion) { + _swiftVersion = machoFile->swiftVersion(); + } else if (machoFile->swiftVersion() && + machoFile->swiftVersion() != _swiftVersion) { + // Swift versions are different. + return llvm::make_error<GenericError>("different swift versions"); + } + + return llvm::Error(); +} + +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h new file mode 100644 index 00000000000..92a21f7ef83 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFile.h @@ -0,0 +1,344 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFile.h -----------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file These data structures comprise the "normalized" view of +/// mach-o object files. The normalized view is an in-memory only data structure +/// which is always in native endianness and pointer size. +/// +/// The normalized view easily converts to and from YAML using YAML I/O. +/// +/// The normalized view converts to and from binary mach-o object files using +/// the writeBinary() and readBinary() functions. +/// +/// The normalized view converts to and from lld::Atoms using the +/// normalizedToAtoms() and normalizedFromAtoms(). +/// +/// Overall, the conversion paths available look like: +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// v +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ +/// ^ +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ +/// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H +#define LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H + +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/YAMLTraits.h" + +using llvm::BumpPtrAllocator; +using llvm::yaml::Hex64; +using llvm::yaml::Hex32; +using llvm::yaml::Hex16; +using llvm::yaml::Hex8; +using llvm::yaml::SequenceTraits; +using llvm::MachO::HeaderFileType; +using llvm::MachO::BindType; +using llvm::MachO::RebaseType; +using llvm::MachO::NListType; +using llvm::MachO::RelocationInfoType; +using llvm::MachO::SectionType; +using llvm::MachO::LoadCommandType; +using llvm::MachO::ExportSymbolKind; +using llvm::MachO::DataRegionType; + +namespace lld { +namespace mach_o { +namespace normalized { + + +/// The real mach-o relocation record is 8-bytes on disk and is +/// encoded in one of two different bit-field patterns. This +/// normalized form has the union of all possible fields. +struct Relocation { + Relocation() : offset(0), scattered(false), + type(llvm::MachO::GENERIC_RELOC_VANILLA), + length(0), pcRel(false), isExtern(false), value(0), + symbol(0) { } + + Hex32 offset; + bool scattered; + RelocationInfoType type; + uint8_t length; + bool pcRel; + bool isExtern; + Hex32 value; + uint32_t symbol; +}; + +/// A typedef so that YAML I/O can treat this vector as a sequence. +typedef std::vector<Relocation> Relocations; + +/// A typedef so that YAML I/O can process the raw bytes in a section. +typedef std::vector<Hex8> ContentBytes; + +/// A typedef so that YAML I/O can treat indirect symbols as a flow sequence. +typedef std::vector<uint32_t> IndirectSymbols; + +/// A typedef so that YAML I/O can encode/decode section attributes. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, SectionAttr) + +/// A typedef so that YAML I/O can encode/decode section alignment. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SectionAlignment) + +/// Mach-O has a 32-bit and 64-bit section record. This normalized form +/// can support either kind. +struct Section { + Section() : type(llvm::MachO::S_REGULAR), + attributes(0), alignment(1), address(0) { } + + StringRef segmentName; + StringRef sectionName; + SectionType type; + SectionAttr attributes; + SectionAlignment alignment; + Hex64 address; + ArrayRef<uint8_t> content; + Relocations relocations; + IndirectSymbols indirectSymbols; + +#ifndef NDEBUG + raw_ostream& operator<<(raw_ostream &OS) const { + dump(OS); + return OS; + } + + void dump(raw_ostream &OS = llvm::dbgs()) const; +#endif +}; + + +/// A typedef so that YAML I/O can encode/decode the scope bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, SymbolScope) + +/// A typedef so that YAML I/O can encode/decode the desc bits of an nlist. +LLVM_YAML_STRONG_TYPEDEF(uint16_t, SymbolDesc) + +/// Mach-O has a 32-bit and 64-bit symbol table entry (nlist), and the symbol +/// type and scope and mixed in the same n_type field. This normalized form +/// works for any pointer size and separates out the type and scope. +struct Symbol { + Symbol() : type(llvm::MachO::N_UNDF), scope(0), sect(0), desc(0), value(0) { } + + StringRef name; + NListType type; + SymbolScope scope; + uint8_t sect; + SymbolDesc desc; + Hex64 value; +}; + +/// Check whether the given section type indicates a zero-filled section. +// FIXME: Utility functions of this kind should probably be moved into +// llvm/Support. +inline bool isZeroFillSection(SectionType T) { + return (T == llvm::MachO::S_ZEROFILL || + T == llvm::MachO::S_THREAD_LOCAL_ZEROFILL); +} + +/// A typedef so that YAML I/O can (de/en)code the protection bits of a segment. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, VMProtect) + +/// A typedef to hold verions X.Y.X packed into 32-bit xxxx.yy.zz +LLVM_YAML_STRONG_TYPEDEF(uint32_t, PackedVersion) + +/// Segments are only used in normalized final linked images (not in relocatable +/// object files). They specify how a range of the file is loaded. +struct Segment { + StringRef name; + Hex64 address; + Hex64 size; + VMProtect init_access; + VMProtect max_access; +}; + +/// Only used in normalized final linked images to specify on which dylibs +/// it depends. +struct DependentDylib { + StringRef path; + LoadCommandType kind; + PackedVersion compatVersion; + PackedVersion currentVersion; +}; + +/// A normalized rebasing entry. Only used in normalized final linked images. +struct RebaseLocation { + Hex32 segOffset; + uint8_t segIndex; + RebaseType kind; +}; + +/// A normalized binding entry. Only used in normalized final linked images. +struct BindLocation { + Hex32 segOffset; + uint8_t segIndex; + BindType kind; + bool canBeNull; + int ordinal; + StringRef symbolName; + Hex64 addend; +}; + +/// A typedef so that YAML I/O can encode/decode export flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, ExportFlags) + +/// A normalized export entry. Only used in normalized final linked images. +struct Export { + StringRef name; + Hex64 offset; + ExportSymbolKind kind; + ExportFlags flags; + Hex32 otherOffset; + StringRef otherName; +}; + +/// A normalized data-in-code entry. +struct DataInCode { + Hex32 offset; + Hex16 length; + DataRegionType kind; +}; + + +/// A typedef so that YAML I/O can encode/decode mach_header.flags. +LLVM_YAML_STRONG_TYPEDEF(uint32_t, FileFlags) + +/// +struct NormalizedFile { + MachOLinkingContext::Arch arch = MachOLinkingContext::arch_unknown; + HeaderFileType fileType = llvm::MachO::MH_OBJECT; + FileFlags flags = 0; + std::vector<Segment> segments; // Not used in object files. + std::vector<Section> sections; + + // Symbols sorted by kind. + std::vector<Symbol> localSymbols; + std::vector<Symbol> globalSymbols; + std::vector<Symbol> undefinedSymbols; + + // Maps to load commands with no LINKEDIT content (final linked images only). + std::vector<DependentDylib> dependentDylibs; + StringRef installName; // dylibs only + PackedVersion compatVersion = 0; // dylibs only + PackedVersion currentVersion = 0; // dylibs only + bool hasUUID = false; + bool hasMinVersionLoadCommand = false; + bool generateDataInCodeLoadCommand = false; + std::vector<StringRef> rpaths; + Hex64 entryAddress = 0; + Hex64 stackSize = 0; + MachOLinkingContext::OS os = MachOLinkingContext::OS::unknown; + Hex64 sourceVersion = 0; + PackedVersion minOSverson = 0; + PackedVersion sdkVersion = 0; + LoadCommandType minOSVersionKind = (LoadCommandType)0; + + // Maps to load commands with LINKEDIT content (final linked images only). + Hex32 pageSize = 0; + std::vector<RebaseLocation> rebasingInfo; + std::vector<BindLocation> bindingInfo; + std::vector<BindLocation> weakBindingInfo; + std::vector<BindLocation> lazyBindingInfo; + std::vector<Export> exportInfo; + std::vector<uint8_t> functionStarts; + std::vector<DataInCode> dataInCode; + + // TODO: + // code-signature + // split-seg-info + // function-starts + + // For any allocations in this struct which need to be owned by this struct. + BumpPtrAllocator ownedAllocations; +}; + +/// Tests if a file is a non-fat mach-o object file. +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch); + +/// If the buffer is a fat file with the request arch, then this function +/// returns true with 'offset' and 'size' set to location of the arch slice +/// within the buffer. Otherwise returns false; +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size); + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch); + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path); + +size_t headerAndLoadCommandsSize(const NormalizedFile &file); + + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb); + +/// Writes a yaml encoded mach-o files given an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out); + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs); + +/// Takes in-memory normalized dylib or object and parses it into lld::File +llvm::Expected<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs); + +/// Takes atoms and generates a normalized macho-o view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, const MachOLinkingContext &ctxt); + + +} // namespace normalized + +/// Class for interfacing mach-o yaml files into generic yaml parsing +class MachOYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { +public: + MachOYamlIOTaggedDocumentHandler(MachOLinkingContext::Arch arch) + : _arch(arch) { } + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override; +private: + const MachOLinkingContext::Arch _arch; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZE_FILE_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp new file mode 100644 index 00000000000..a17de5be174 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp @@ -0,0 +1,586 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryReader.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts from +/// mach-o on-disk binary format to in-memory normalized mach-o. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// | +/// | +/// v +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/SharedLibraryFile.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Object/MachO.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <system_error> + +using namespace llvm::MachO; +using llvm::object::ExportEntry; +using llvm::object::MachOObjectFile; + +namespace lld { +namespace mach_o { +namespace normalized { + +// Utility to call a lambda expression on each load command. +static llvm::Error forEachLoadCommand( + StringRef lcRange, unsigned lcCount, bool isBig, bool is64, + std::function<bool(uint32_t cmd, uint32_t size, const char *lc)> func) { + const char* p = lcRange.begin(); + for (unsigned i=0; i < lcCount; ++i) { + const load_command *lc = reinterpret_cast<const load_command*>(p); + load_command lcCopy; + const load_command *slc = lc; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&lcCopy, lc, sizeof(load_command)); + swapStruct(lcCopy); + slc = &lcCopy; + } + if ( (p + slc->cmdsize) > lcRange.end() ) + return llvm::make_error<GenericError>("Load command exceeds range"); + + if (func(slc->cmd, slc->cmdsize, p)) + return llvm::Error(); + + p += slc->cmdsize; + } + + return llvm::Error(); +} + +static std::error_code appendRelocations(Relocations &relocs, StringRef buffer, + bool bigEndian, + uint32_t reloff, uint32_t nreloc) { + if ((reloff + nreloc*8) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + const any_relocation_info* relocsArray = + reinterpret_cast<const any_relocation_info*>(buffer.begin()+reloff); + + for(uint32_t i=0; i < nreloc; ++i) { + relocs.push_back(unpackRelocation(relocsArray[i], bigEndian)); + } + return std::error_code(); +} + +static std::error_code +appendIndirectSymbols(IndirectSymbols &isyms, StringRef buffer, bool isBig, + uint32_t istOffset, uint32_t istCount, + uint32_t startIndex, uint32_t count) { + if ((istOffset + istCount*4) > buffer.size()) + return make_error_code(llvm::errc::executable_format_error); + if (startIndex+count > istCount) + return make_error_code(llvm::errc::executable_format_error); + const uint8_t *indirectSymbolArray = (const uint8_t *)buffer.data(); + + for(uint32_t i=0; i < count; ++i) { + isyms.push_back(read32( + indirectSymbolArray + (startIndex + i) * sizeof(uint32_t), isBig)); + } + return std::error_code(); +} + + +template <typename T> static T readBigEndian(T t) { + if (llvm::sys::IsLittleEndianHost) + llvm::sys::swapByteOrder(t); + return t; +} + + +static bool isMachOHeader(const mach_header *mh, bool &is64, bool &isBig) { + switch (read32(&mh->magic, false)) { + case llvm::MachO::MH_MAGIC: + is64 = false; + isBig = false; + return true; + case llvm::MachO::MH_MAGIC_64: + is64 = true; + isBig = false; + return true; + case llvm::MachO::MH_CIGAM: + is64 = false; + isBig = true; + return true; + case llvm::MachO::MH_CIGAM_64: + is64 = true; + isBig = true; + return true; + default: + return false; + } +} + + +bool isThinObjectFile(StringRef path, MachOLinkingContext::Arch &arch) { + // Try opening and mapping file at path. + ErrorOr<std::unique_ptr<MemoryBuffer>> b = MemoryBuffer::getFileOrSTDIN(path); + if (b.getError()) + return false; + + // If file length < 32 it is too small to be mach-o object file. + StringRef fileBuffer = b->get()->getBuffer(); + if (fileBuffer.size() < 32) + return false; + + // If file buffer does not start with MH_MAGIC (and variants), not obj file. + const mach_header *mh = reinterpret_cast<const mach_header *>( + fileBuffer.begin()); + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return false; + + // If not MH_OBJECT, not object file. + if (read32(&mh->filetype, isBig) != MH_OBJECT) + return false; + + // Lookup up arch from cpu/subtype pair. + arch = MachOLinkingContext::archFromCpuType( + read32(&mh->cputype, isBig), + read32(&mh->cpusubtype, isBig)); + return true; +} + +bool sliceFromFatFile(MemoryBufferRef mb, MachOLinkingContext::Arch arch, + uint32_t &offset, uint32_t &size) { + const char *start = mb.getBufferStart(); + const llvm::MachO::fat_header *fh = + reinterpret_cast<const llvm::MachO::fat_header *>(start); + if (readBigEndian(fh->magic) != llvm::MachO::FAT_MAGIC) + return false; + uint32_t nfat_arch = readBigEndian(fh->nfat_arch); + const fat_arch *fstart = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header)); + const fat_arch *fend = + reinterpret_cast<const fat_arch *>(start + sizeof(fat_header) + + sizeof(fat_arch) * nfat_arch); + const uint32_t reqCpuType = MachOLinkingContext::cpuTypeFromArch(arch); + const uint32_t reqCpuSubtype = MachOLinkingContext::cpuSubtypeFromArch(arch); + for (const fat_arch *fa = fstart; fa < fend; ++fa) { + if ((readBigEndian(fa->cputype) == reqCpuType) && + (readBigEndian(fa->cpusubtype) == reqCpuSubtype)) { + offset = readBigEndian(fa->offset); + size = readBigEndian(fa->size); + if ((offset + size) > mb.getBufferSize()) + return false; + return true; + } + } + return false; +} + +/// Reads a mach-o file and produces an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readBinary(std::unique_ptr<MemoryBuffer> &mb, + const MachOLinkingContext::Arch arch) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + const char *start = mb->getBufferStart(); + size_t objSize = mb->getBufferSize(); + const mach_header *mh = reinterpret_cast<const mach_header *>(start); + + uint32_t sliceOffset; + uint32_t sliceSize; + if (sliceFromFatFile(mb->getMemBufferRef(), arch, sliceOffset, sliceSize)) { + start = &start[sliceOffset]; + objSize = sliceSize; + mh = reinterpret_cast<const mach_header *>(start); + } + + // Determine endianness and pointer size for mach-o file. + bool is64, isBig; + if (!isMachOHeader(mh, is64, isBig)) + return llvm::make_error<GenericError>("File is not a mach-o"); + + // Endian swap header, if needed. + mach_header headerCopy; + const mach_header *smh = mh; + if (isBig != llvm::sys::IsBigEndianHost) { + memcpy(&headerCopy, mh, sizeof(mach_header)); + swapStruct(headerCopy); + smh = &headerCopy; + } + + // Validate head and load commands fit in buffer. + const uint32_t lcCount = smh->ncmds; + const char *lcStart = + start + (is64 ? sizeof(mach_header_64) : sizeof(mach_header)); + StringRef lcRange(lcStart, smh->sizeofcmds); + if (lcRange.end() > (start + objSize)) + return llvm::make_error<GenericError>("Load commands exceed file size"); + + // Get architecture from mach_header. + f->arch = MachOLinkingContext::archFromCpuType(smh->cputype, smh->cpusubtype); + if (f->arch != arch) { + return llvm::make_error<GenericError>( + Twine("file is wrong architecture. Expected " + "(" + MachOLinkingContext::nameFromArch(arch) + + ") found (" + + MachOLinkingContext::nameFromArch(f->arch) + + ")" )); + } + // Copy file type and flags + f->fileType = HeaderFileType(smh->filetype); + f->flags = smh->flags; + + + // Pre-scan load commands looking for indirect symbol table. + uint32_t indirectSymbolTableOffset = 0; + uint32_t indirectSymbolTableCount = 0; + auto ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&](uint32_t cmd, uint32_t size, + const char *lc) -> bool { + if (cmd == LC_DYSYMTAB) { + const dysymtab_command *d = reinterpret_cast<const dysymtab_command*>(lc); + indirectSymbolTableOffset = read32(&d->indirectsymoff, isBig); + indirectSymbolTableCount = read32(&d->nindirectsyms, isBig); + return true; + } + return false; + }); + if (ec) + return std::move(ec); + + // Walk load commands looking for segments/sections and the symbol table. + const data_in_code_entry *dataInCode = nullptr; + const dyld_info_command *dyldInfo = nullptr; + uint32_t dataInCodeSize = 0; + ec = forEachLoadCommand(lcRange, lcCount, isBig, is64, + [&] (uint32_t cmd, uint32_t size, const char* lc) -> bool { + switch(cmd) { + case LC_SEGMENT_64: + if (is64) { + const segment_command_64 *seg = + reinterpret_cast<const segment_command_64*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section_64 *sects = reinterpret_cast<const section_64*> + (lc + sizeof(segment_command_64)); + const unsigned lcSize = sizeof(segment_command_64) + + sectionCount*sizeof(section_64); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section_64 *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = read32(§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read64(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read64(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols(section.indirectSymbols, mb->getBuffer(), + isBig, + indirectSymbolTableOffset, + indirectSymbolTableCount, + read32(§->reserved1, isBig), + contentSize/4); + } + f->sections.push_back(section); + } + } + break; + case LC_SEGMENT: + if (!is64) { + const segment_command *seg = + reinterpret_cast<const segment_command*>(lc); + const unsigned sectionCount = read32(&seg->nsects, isBig); + const section *sects = reinterpret_cast<const section*> + (lc + sizeof(segment_command)); + const unsigned lcSize = sizeof(segment_command) + + sectionCount*sizeof(section); + // Verify sections don't extend beyond end of segment load command. + if (lcSize > size) + return true; + for (unsigned i=0; i < sectionCount; ++i) { + const section *sect = §s[i]; + Section section; + section.segmentName = getString16(sect->segname); + section.sectionName = getString16(sect->sectname); + section.type = (SectionType)(read32(§->flags, isBig) & + SECTION_TYPE); + section.attributes = + read32((const uint8_t *)§->flags, isBig) & SECTION_ATTRIBUTES; + section.alignment = 1 << read32(§->align, isBig); + section.address = read32(§->addr, isBig); + const uint8_t *content = + (const uint8_t *)start + read32(§->offset, isBig); + size_t contentSize = read32(§->size, isBig); + // Note: this assign() is copying the content bytes. Ideally, + // we can use a custom allocator for vector to avoid the copy. + section.content = llvm::makeArrayRef(content, contentSize); + appendRelocations(section.relocations, mb->getBuffer(), isBig, + read32(§->reloff, isBig), + read32(§->nreloc, isBig)); + if (section.type == S_NON_LAZY_SYMBOL_POINTERS) { + appendIndirectSymbols( + section.indirectSymbols, mb->getBuffer(), isBig, + indirectSymbolTableOffset, indirectSymbolTableCount, + read32(§->reserved1, isBig), contentSize / 4); + } + f->sections.push_back(section); + } + } + break; + case LC_SYMTAB: { + const symtab_command *st = reinterpret_cast<const symtab_command*>(lc); + const char *strings = start + read32(&st->stroff, isBig); + const uint32_t strSize = read32(&st->strsize, isBig); + // Validate string pool and symbol table all in buffer. + if (read32((const uint8_t *)&st->stroff, isBig) + + read32((const uint8_t *)&st->strsize, isBig) > + objSize) + return true; + if (is64) { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist_64)) > objSize) + return true; + const nlist_64 *symbols = + reinterpret_cast<const nlist_64 *>(start + symOffset); + // Convert each nlist_64 to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + nlist_64 tempSym; + memcpy(&tempSym, &symbols[i], sizeof(nlist_64)); + const nlist_64 *sin = &tempSym; + if (isBig != llvm::sys::IsBigEndianHost) + swapStruct(tempSym); + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sin->n_type & N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } else { + const uint32_t symOffset = read32(&st->symoff, isBig); + const uint32_t symCount = read32(&st->nsyms, isBig); + if ( symOffset+(symCount*sizeof(nlist)) > objSize) + return true; + const nlist *symbols = + reinterpret_cast<const nlist *>(start + symOffset); + // Convert each nlist to a lld::mach_o::normalized::Symbol. + for(uint32_t i=0; i < symCount; ++i) { + const nlist *sin = &symbols[i]; + nlist tempSym; + if (isBig != llvm::sys::IsBigEndianHost) { + tempSym = *sin; swapStruct(tempSym); sin = &tempSym; + } + Symbol sout; + if (sin->n_strx > strSize) + return true; + sout.name = &strings[sin->n_strx]; + sout.type = (NListType)(sin->n_type & N_TYPE); + sout.scope = (sin->n_type & (N_PEXT|N_EXT)); + sout.sect = sin->n_sect; + sout.desc = sin->n_desc; + sout.value = sin->n_value; + if (sout.type == N_UNDF) + f->undefinedSymbols.push_back(sout); + else if (sout.scope == (SymbolScope)N_EXT) + f->globalSymbols.push_back(sout); + else + f->localSymbols.push_back(sout); + } + } + } + break; + case LC_ID_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + f->installName = lc + read32(&dl->dylib.name, isBig); + f->currentVersion = read32(&dl->dylib.current_version, isBig); + f->compatVersion = read32(&dl->dylib.compatibility_version, isBig); + } + break; + case LC_DATA_IN_CODE: { + const linkedit_data_command *ldc = + reinterpret_cast<const linkedit_data_command*>(lc); + dataInCode = reinterpret_cast<const data_in_code_entry *>( + start + read32(&ldc->dataoff, isBig)); + dataInCodeSize = read32(&ldc->datasize, isBig); + } + break; + case LC_LOAD_DYLIB: + case LC_LOAD_WEAK_DYLIB: + case LC_REEXPORT_DYLIB: + case LC_LOAD_UPWARD_DYLIB: { + const dylib_command *dl = reinterpret_cast<const dylib_command*>(lc); + DependentDylib entry; + entry.path = lc + read32(&dl->dylib.name, isBig); + entry.kind = LoadCommandType(cmd); + entry.compatVersion = read32(&dl->dylib.compatibility_version, isBig); + entry.currentVersion = read32(&dl->dylib.current_version, isBig); + f->dependentDylibs.push_back(entry); + } + break; + case LC_RPATH: { + const rpath_command *rpc = reinterpret_cast<const rpath_command *>(lc); + f->rpaths.push_back(lc + read32(&rpc->path, isBig)); + } + break; + case LC_DYLD_INFO: + case LC_DYLD_INFO_ONLY: + dyldInfo = reinterpret_cast<const dyld_info_command*>(lc); + break; + case LC_VERSION_MIN_MACOSX: + case LC_VERSION_MIN_IPHONEOS: + case LC_VERSION_MIN_WATCHOS: + case LC_VERSION_MIN_TVOS: + // If we are emitting an object file, then we may take the load command + // kind from these commands and pass it on to the output + // file. + f->minOSVersionKind = (LoadCommandType)cmd; + break; + } + return false; + }); + if (ec) + return std::move(ec); + + if (dataInCode) { + // Convert on-disk data_in_code_entry array to DataInCode vector. + for (unsigned i=0; i < dataInCodeSize/sizeof(data_in_code_entry); ++i) { + DataInCode entry; + entry.offset = read32(&dataInCode[i].offset, isBig); + entry.length = read16(&dataInCode[i].length, isBig); + entry.kind = + (DataRegionType)read16((const uint8_t *)&dataInCode[i].kind, isBig); + f->dataInCode.push_back(entry); + } + } + + if (dyldInfo) { + // If any exports, extract and add to normalized exportInfo vector. + if (dyldInfo->export_size) { + const uint8_t *trieStart = reinterpret_cast<const uint8_t*>(start + + dyldInfo->export_off); + ArrayRef<uint8_t> trie(trieStart, dyldInfo->export_size); + for (const ExportEntry &trieExport : MachOObjectFile::exports(trie)) { + Export normExport; + normExport.name = trieExport.name().copy(f->ownedAllocations); + normExport.offset = trieExport.address(); + normExport.kind = ExportSymbolKind(trieExport.flags() & EXPORT_SYMBOL_FLAGS_KIND_MASK); + normExport.flags = trieExport.flags() & ~EXPORT_SYMBOL_FLAGS_KIND_MASK; + normExport.otherOffset = trieExport.other(); + if (!trieExport.otherName().empty()) + normExport.otherName = trieExport.otherName().copy(f->ownedAllocations); + f->exportInfo.push_back(normExport); + } + } + } + + return std::move(f); +} + +class MachOObjectReader : public Reader { +public: + MachOObjectReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + return (magic == llvm::sys::fs::file_magic::macho_object && + mb.getBufferSize() > 32); + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®istry) const override { + std::unique_ptr<File> ret = + llvm::make_unique<MachOFile>(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +class MachODylibReader : public Reader { +public: + MachODylibReader(MachOLinkingContext &ctx) : _ctx(ctx) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + switch (magic) { + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case llvm::sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + return mb.getBufferSize() > 32; + default: + return false; + } + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const Registry ®istry) const override { + std::unique_ptr<File> ret = + llvm::make_unique<MachODylibFile>(std::move(mb), &_ctx); + return std::move(ret); + } + +private: + MachOLinkingContext &_ctx; +}; + +} // namespace normalized +} // namespace mach_o + +void Registry::addSupportMachOObjects(MachOLinkingContext &ctx) { + MachOLinkingContext::Arch arch = ctx.arch(); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachOObjectReader(ctx))); + add(std::unique_ptr<Reader>(new mach_o::normalized::MachODylibReader(ctx))); + addKindTable(Reference::KindNamespace::mach_o, ctx.archHandler().kindArch(), + ctx.archHandler().kindStrings()); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new mach_o::MachOYamlIOTaggedDocumentHandler(arch))); +} + + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h new file mode 100644 index 00000000000..86823efa33c --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h @@ -0,0 +1,218 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryUtils.h ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H +#define LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MachO.h" +#include <system_error> + +namespace lld { +namespace mach_o { +namespace normalized { + +class ByteBuffer { +public: + ByteBuffer() : _ostream(_bytes) { } + + void append_byte(uint8_t b) { + _ostream << b; + } + void append_uleb128(uint64_t value) { + llvm::encodeULEB128(value, _ostream); + } + void append_uleb128Fixed(uint64_t value, unsigned byteCount) { + unsigned min = llvm::getULEB128Size(value); + assert(min <= byteCount); + unsigned pad = byteCount - min; + llvm::encodeULEB128(value, _ostream, pad); + } + void append_sleb128(int64_t value) { + llvm::encodeSLEB128(value, _ostream); + } + void append_string(StringRef str) { + _ostream << str; + append_byte(0); + } + void align(unsigned alignment) { + while ( (_ostream.tell() % alignment) != 0 ) + append_byte(0); + } + size_t size() { + return _ostream.tell(); + } + const uint8_t *bytes() { + return reinterpret_cast<const uint8_t*>(_ostream.str().data()); + } + +private: + SmallVector<char, 128> _bytes; + // Stream ivar must be after SmallVector ivar to construct properly. + llvm::raw_svector_ostream _ostream; +}; + +using namespace llvm::support::endian; +using llvm::sys::getSwappedBytes; + +template<typename T> +static inline uint16_t read16(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read16be(loc) : read16le(loc); +} + +template<typename T> +static inline uint32_t read32(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read32be(loc) : read32le(loc); +} + +template<typename T> +static inline uint64_t read64(const T *loc, bool isBig) { + assert((uint64_t)loc % llvm::alignOf<T>() == 0 && + "invalid pointer alignment"); + return isBig ? read64be(loc) : read64le(loc); +} + +inline void write16(uint8_t *loc, uint16_t value, bool isBig) { + if (isBig) + write16be(loc, value); + else + write16le(loc, value); +} + +inline void write32(uint8_t *loc, uint32_t value, bool isBig) { + if (isBig) + write32be(loc, value); + else + write32le(loc, value); +} + +inline void write64(uint8_t *loc, uint64_t value, bool isBig) { + if (isBig) + write64be(loc, value); + else + write64le(loc, value); +} + +inline uint32_t +bitFieldExtract(uint32_t value, bool isBigEndianBigField, uint8_t firstBit, + uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + return (value >> shift) & mask; +} + +inline void +bitFieldSet(uint32_t &bits, bool isBigEndianBigField, uint32_t newBits, + uint8_t firstBit, uint8_t bitCount) { + const uint32_t mask = ((1<<bitCount)-1); + assert((newBits & mask) == newBits); + const uint8_t shift = isBigEndianBigField ? (32-firstBit-bitCount) : firstBit; + bits &= ~(mask << shift); + bits |= (newBits << shift); +} + +inline Relocation unpackRelocation(const llvm::MachO::any_relocation_info &r, + bool isBigEndian) { + uint32_t r0 = read32(&r.r_word0, isBigEndian); + uint32_t r1 = read32(&r.r_word1, isBigEndian); + + Relocation result; + if (r0 & llvm::MachO::R_SCATTERED) { + // scattered relocation record always laid out like big endian bit field + result.offset = bitFieldExtract(r0, true, 8, 24); + result.scattered = true; + result.type = (RelocationInfoType) + bitFieldExtract(r0, true, 4, 4); + result.length = bitFieldExtract(r0, true, 2, 2); + result.pcRel = bitFieldExtract(r0, true, 1, 1); + result.isExtern = false; + result.value = r1; + result.symbol = 0; + } else { + result.offset = r0; + result.scattered = false; + result.type = (RelocationInfoType) + bitFieldExtract(r1, isBigEndian, 28, 4); + result.length = bitFieldExtract(r1, isBigEndian, 25, 2); + result.pcRel = bitFieldExtract(r1, isBigEndian, 24, 1); + result.isExtern = bitFieldExtract(r1, isBigEndian, 27, 1); + result.value = 0; + result.symbol = bitFieldExtract(r1, isBigEndian, 0, 24); + } + return result; +} + + +inline llvm::MachO::any_relocation_info +packRelocation(const Relocation &r, bool swap, bool isBigEndian) { + uint32_t r0 = 0; + uint32_t r1 = 0; + + if (r.scattered) { + r1 = r.value; + bitFieldSet(r0, true, r.offset, 8, 24); + bitFieldSet(r0, true, r.type, 4, 4); + bitFieldSet(r0, true, r.length, 2, 2); + bitFieldSet(r0, true, r.pcRel, 1, 1); + bitFieldSet(r0, true, r.scattered, 0, 1); // R_SCATTERED + } else { + r0 = r.offset; + bitFieldSet(r1, isBigEndian, r.type, 28, 4); + bitFieldSet(r1, isBigEndian, r.isExtern, 27, 1); + bitFieldSet(r1, isBigEndian, r.length, 25, 2); + bitFieldSet(r1, isBigEndian, r.pcRel, 24, 1); + bitFieldSet(r1, isBigEndian, r.symbol, 0, 24); + } + + llvm::MachO::any_relocation_info result; + result.r_word0 = swap ? getSwappedBytes(r0) : r0; + result.r_word1 = swap ? getSwappedBytes(r1) : r1; + return result; +} + +inline StringRef getString16(const char s[16]) { + StringRef x = s; + if ( x.size() > 16 ) + return x.substr(0, 16); + else + return x; +} + +inline void setString16(StringRef str, char s[16]) { + memset(s, 0, 16); + memcpy(s, str.begin(), (str.size() > 16) ? 16: str.size()); +} + +// Implemented in normalizedToAtoms() and used by normalizedFromAtoms() so +// that the same table can be used to map mach-o sections to and from +// DefinedAtom::ContentType. +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit); + +} // namespace normalized +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_NORMALIZED_FILE_BINARY_UTILS_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp new file mode 100644 index 00000000000..f3e159684e1 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp @@ -0,0 +1,1500 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileBinaryWriter.cpp ---------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation converts normalized +/// mach-o in memory to mach-o binary on disk. +/// +/// +---------------+ +/// | binary mach-o | +/// +---------------+ +/// ^ +/// | +/// | +/// +------------+ +/// | normalized | +/// +------------+ + +#include "MachONormalizedFile.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/ilist.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include <functional> +#include <list> +#include <map> +#include <system_error> + +using namespace llvm::MachO; + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode; // Forward declaration. + +struct TrieEdge : public llvm::ilist_node<TrieEdge> { + TrieEdge(StringRef s, TrieNode *node) : _subString(s), _child(node) {} + + StringRef _subString; + struct TrieNode *_child; +}; + +} // namespace normalized +} // namespace mach_o +} // namespace lld + + +namespace llvm { + using lld::mach_o::normalized::TrieEdge; + template <> + struct ilist_traits<TrieEdge> + : public ilist_default_traits<TrieEdge> { + private: + mutable ilist_half_node<TrieEdge> Sentinel; + public: + TrieEdge *createSentinel() const { + return static_cast<TrieEdge*>(&Sentinel); + } + void destroySentinel(TrieEdge *) const {} + + TrieEdge *provideInitialHead() const { return createSentinel(); } + TrieEdge *ensureHead(TrieEdge*) const { return createSentinel(); } + static void noteHead(TrieEdge*, TrieEdge*) {} + void deleteNode(TrieEdge *N) {} + + private: + void createNode(const TrieEdge &); + }; +} // namespace llvm + + +namespace lld { +namespace mach_o { +namespace normalized { + +struct TrieNode { + typedef llvm::ilist<TrieEdge> TrieEdgeList; + + TrieNode(StringRef s) + : _cummulativeString(s), _address(0), _flags(0), _other(0), + _trieOffset(0), _hasExportInfo(false) {} + ~TrieNode() = default; + + void addSymbol(const Export &entry, BumpPtrAllocator &allocator, + std::vector<TrieNode *> &allNodes); + bool updateOffset(uint32_t &offset); + void appendToByteBuffer(ByteBuffer &out); + +private: + StringRef _cummulativeString; + TrieEdgeList _children; + uint64_t _address; + uint64_t _flags; + uint64_t _other; + StringRef _importedName; + uint32_t _trieOffset; + bool _hasExportInfo; +}; + +/// Utility class for writing a mach-o binary file given an in-memory +/// normalized file. +class MachOFileLayout { +public: + /// All layout computation is done in the constructor. + MachOFileLayout(const NormalizedFile &file); + + /// Returns the final file size as computed in the constructor. + size_t size() const; + + // Returns size of the mach_header and load commands. + size_t headerAndLoadCommandsSize() const; + + /// Writes the normalized file as a binary mach-o file to the specified + /// path. This does not have a stream interface because the generated + /// file may need the 'x' bit set. + llvm::Error writeBinary(StringRef path); + +private: + uint32_t loadCommandsSize(uint32_t &count); + void buildFileOffsets(); + void writeMachHeader(); + llvm::Error writeLoadCommands(); + void writeSectionContent(); + void writeRelocations(); + void writeSymbolTable(); + void writeRebaseInfo(); + void writeBindingInfo(); + void writeLazyBindingInfo(); + void writeExportInfo(); + void writeFunctionStartsInfo(); + void writeDataInCodeInfo(); + void writeLinkEditContent(); + void buildLinkEditInfo(); + void buildRebaseInfo(); + void buildBindInfo(); + void buildLazyBindInfo(); + void buildExportTrie(); + void computeFunctionStartsSize(); + void computeDataInCodeSize(); + void computeSymbolTableSizes(); + void buildSectionRelocations(); + void appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset); + uint32_t indirectSymbolIndex(const Section §, uint32_t &index); + uint32_t indirectSymbolElementSize(const Section §); + + // For use as template parameter to load command methods. + struct MachO64Trait { + typedef llvm::MachO::segment_command_64 command; + typedef llvm::MachO::section_64 section; + enum { LC = llvm::MachO::LC_SEGMENT_64 }; + }; + + // For use as template parameter to load command methods. + struct MachO32Trait { + typedef llvm::MachO::segment_command command; + typedef llvm::MachO::section section; + enum { LC = llvm::MachO::LC_SEGMENT }; + }; + + template <typename T> + llvm::Error writeSingleSegmentLoadCommand(uint8_t *&lc); + template <typename T> llvm::Error writeSegmentLoadCommands(uint8_t *&lc); + + uint32_t pointerAlign(uint32_t value); + static StringRef dyldPath(); + + struct SegExtraInfo { + uint32_t fileOffset; + uint32_t fileSize; + std::vector<const Section*> sections; + }; + typedef std::map<const Segment*, SegExtraInfo> SegMap; + struct SectionExtraInfo { + uint32_t fileOffset; + }; + typedef std::map<const Section*, SectionExtraInfo> SectionMap; + + const NormalizedFile &_file; + std::error_code _ec; + uint8_t *_buffer; + const bool _is64; + const bool _swap; + const bool _bigEndianArch; + uint64_t _seg1addr; + uint32_t _startOfLoadCommands; + uint32_t _countOfLoadCommands; + uint32_t _endOfLoadCommands; + uint32_t _startOfRelocations; + uint32_t _startOfFunctionStarts; + uint32_t _startOfDataInCode; + uint32_t _startOfSymbols; + uint32_t _startOfIndirectSymbols; + uint32_t _startOfSymbolStrings; + uint32_t _endOfSymbolStrings; + uint32_t _symbolTableLocalsStartIndex; + uint32_t _symbolTableGlobalsStartIndex; + uint32_t _symbolTableUndefinesStartIndex; + uint32_t _symbolStringPoolSize; + uint32_t _symbolTableSize; + uint32_t _functionStartsSize; + uint32_t _dataInCodeSize; + uint32_t _indirectSymbolTableCount; + // Used in object file creation only + uint32_t _startOfSectionsContent; + uint32_t _endOfSectionsContent; + // Used in final linked image only + uint32_t _startOfLinkEdit; + uint32_t _startOfRebaseInfo; + uint32_t _endOfRebaseInfo; + uint32_t _startOfBindingInfo; + uint32_t _endOfBindingInfo; + uint32_t _startOfLazyBindingInfo; + uint32_t _endOfLazyBindingInfo; + uint32_t _startOfExportTrie; + uint32_t _endOfExportTrie; + uint32_t _endOfLinkEdit; + uint64_t _addressOfLinkEdit; + SegMap _segInfo; + SectionMap _sectInfo; + ByteBuffer _rebaseInfo; + ByteBuffer _bindingInfo; + ByteBuffer _lazyBindingInfo; + ByteBuffer _weakBindingInfo; + ByteBuffer _exportTrie; +}; + +size_t headerAndLoadCommandsSize(const NormalizedFile &file) { + MachOFileLayout layout(file); + return layout.headerAndLoadCommandsSize(); +} + +StringRef MachOFileLayout::dyldPath() { + return "/usr/lib/dyld"; +} + +uint32_t MachOFileLayout::pointerAlign(uint32_t value) { + return llvm::alignTo(value, _is64 ? 8 : 4); +} + + +size_t MachOFileLayout::headerAndLoadCommandsSize() const { + return _endOfLoadCommands; +} + +MachOFileLayout::MachOFileLayout(const NormalizedFile &file) + : _file(file), + _is64(MachOLinkingContext::is64Bit(file.arch)), + _swap(!MachOLinkingContext::isHostEndian(file.arch)), + _bigEndianArch(MachOLinkingContext::isBigEndian(file.arch)), + _seg1addr(INT64_MAX) { + _startOfLoadCommands = _is64 ? sizeof(mach_header_64) : sizeof(mach_header); + const size_t segCommandBaseSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectsSize = (_is64 ? sizeof(section_64) : sizeof(section)); + if (file.fileType == llvm::MachO::MH_OBJECT) { + // object files have just one segment load command containing all sections + _endOfLoadCommands = _startOfLoadCommands + + segCommandBaseSize + + file.sections.size() * sectsSize + + sizeof(symtab_command); + _countOfLoadCommands = 2; + if (file.hasMinVersionLoadCommand) { + _endOfLoadCommands += sizeof(version_min_command); + _countOfLoadCommands++; + } + if (!_file.functionStarts.empty()) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + if (_file.generateDataInCodeLoadCommand) { + _endOfLoadCommands += sizeof(linkedit_data_command); + _countOfLoadCommands++; + } + // Assign file offsets to each section. + _startOfSectionsContent = _endOfLoadCommands; + unsigned relocCount = 0; + uint64_t offset = _startOfSectionsContent; + for (const Section § : file.sections) { + if (isZeroFillSection(sect.type)) + _sectInfo[§].fileOffset = 0; + else { + offset = llvm::alignTo(offset, sect.alignment); + _sectInfo[§].fileOffset = offset; + offset += sect.content.size(); + } + relocCount += sect.relocations.size(); + } + _endOfSectionsContent = offset; + + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); + + // Align start of relocations. + _startOfRelocations = pointerAlign(_endOfSectionsContent); + _startOfFunctionStarts = _startOfRelocations + relocCount * 8; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + // Add Indirect symbol table. + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + // Align start of symbol table and symbol strings. + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfRelocations=" << _startOfRelocations << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " startOfSectionsContent=" << _startOfSectionsContent << "\n" + << " endOfSectionsContent=" << _endOfSectionsContent << "\n"); + } else { + // Final linked images have one load command per segment. + _endOfLoadCommands = _startOfLoadCommands + + loadCommandsSize(_countOfLoadCommands); + + // Assign section file offsets. + buildFileOffsets(); + buildLinkEditInfo(); + + // LINKEDIT of final linked images has in order: + // rebase info, binding info, lazy binding info, weak binding info, + // data-in-code, symbol table, indirect symbol table, symbol table strings. + _startOfRebaseInfo = _startOfLinkEdit; + _endOfRebaseInfo = _startOfRebaseInfo + _rebaseInfo.size(); + _startOfBindingInfo = _endOfRebaseInfo; + _endOfBindingInfo = _startOfBindingInfo + _bindingInfo.size(); + _startOfLazyBindingInfo = _endOfBindingInfo; + _endOfLazyBindingInfo = _startOfLazyBindingInfo + _lazyBindingInfo.size(); + _startOfExportTrie = _endOfLazyBindingInfo; + _endOfExportTrie = _startOfExportTrie + _exportTrie.size(); + _startOfFunctionStarts = _endOfExportTrie; + _startOfDataInCode = _startOfFunctionStarts + _functionStartsSize; + _startOfSymbols = _startOfDataInCode + _dataInCodeSize; + _startOfIndirectSymbols = _startOfSymbols + _symbolTableSize; + _startOfSymbolStrings = _startOfIndirectSymbols + + pointerAlign(_indirectSymbolTableCount * sizeof(uint32_t)); + _endOfSymbolStrings = _startOfSymbolStrings + + pointerAlign(_symbolStringPoolSize); + _endOfLinkEdit = _endOfSymbolStrings; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "MachOFileLayout()\n" + << " startOfLoadCommands=" << _startOfLoadCommands << "\n" + << " countOfLoadCommands=" << _countOfLoadCommands << "\n" + << " endOfLoadCommands=" << _endOfLoadCommands << "\n" + << " startOfLinkEdit=" << _startOfLinkEdit << "\n" + << " startOfRebaseInfo=" << _startOfRebaseInfo << "\n" + << " endOfRebaseInfo=" << _endOfRebaseInfo << "\n" + << " startOfBindingInfo=" << _startOfBindingInfo << "\n" + << " endOfBindingInfo=" << _endOfBindingInfo << "\n" + << " startOfLazyBindingInfo=" << _startOfLazyBindingInfo << "\n" + << " endOfLazyBindingInfo=" << _endOfLazyBindingInfo << "\n" + << " startOfExportTrie=" << _startOfExportTrie << "\n" + << " endOfExportTrie=" << _endOfExportTrie << "\n" + << " startOfFunctionStarts=" << _startOfFunctionStarts << "\n" + << " startOfDataInCode=" << _startOfDataInCode << "\n" + << " startOfSymbols=" << _startOfSymbols << "\n" + << " startOfSymbolStrings=" << _startOfSymbolStrings << "\n" + << " endOfSymbolStrings=" << _endOfSymbolStrings << "\n" + << " addressOfLinkEdit=" << _addressOfLinkEdit << "\n"); + } +} + +uint32_t MachOFileLayout::loadCommandsSize(uint32_t &count) { + uint32_t size = 0; + count = 0; + + const size_t segCommandSize = + (_is64 ? sizeof(segment_command_64) : sizeof(segment_command)); + const size_t sectionSize = (_is64 ? sizeof(section_64) : sizeof(section)); + + // Add LC_SEGMENT for each segment. + size += _file.segments.size() * segCommandSize; + count += _file.segments.size(); + // Add section record for each section. + size += _file.sections.size() * sectionSize; + + // If creating a dylib, add LC_ID_DYLIB. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + size += sizeof(dylib_command) + pointerAlign(_file.installName.size() + 1); + ++count; + } + + // Add LC_DYLD_INFO + size += sizeof(dyld_info_command); + ++count; + + // Add LC_SYMTAB + size += sizeof(symtab_command); + ++count; + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + size += sizeof(dysymtab_command); + ++count; + } + + // If main executable add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += pointerAlign(sizeof(dylinker_command) + dyldPath().size()+1); + ++count; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + if (_file.hasMinVersionLoadCommand) { + size += sizeof(version_min_command); + ++count; + } + + // Add LC_SOURCE_VERSION + size += sizeof(source_version_command); + ++count; + + // If main executable add LC_MAIN + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + size += sizeof(entry_point_command); + ++count; + } + + // Add LC_LOAD_DYLIB for each dependent dylib. + for (const DependentDylib &dep : _file.dependentDylibs) { + size += sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + ++count; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + size += pointerAlign(sizeof(rpath_command) + path.size() + 1); + ++count; + } + + // Add LC_FUNCTION_STARTS if needed + if (!_file.functionStarts.empty()) { + size += sizeof(linkedit_data_command); + ++count; + } + + // Add LC_DATA_IN_CODE if requested. Note, we do encode zero length entries. + // FIXME: Zero length entries is only to match ld64. Should we change this? + if (_file.generateDataInCodeLoadCommand) { + size += sizeof(linkedit_data_command); + ++count; + } + + return size; +} + +static bool overlaps(const Segment &s1, const Segment &s2) { + if (s2.address >= s1.address+s1.size) + return false; + if (s1.address >= s2.address+s2.size) + return false; + return true; +} + +static bool overlaps(const Section &s1, const Section &s2) { + if (s2.address >= s1.address+s1.content.size()) + return false; + if (s1.address >= s2.address+s2.content.size()) + return false; + return true; +} + +void MachOFileLayout::buildFileOffsets() { + // Verify no segments overlap + for (const Segment &sg1 : _file.segments) { + for (const Segment &sg2 : _file.segments) { + if (&sg1 == &sg2) + continue; + if (overlaps(sg1,sg2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Verify no sections overlap + for (const Section &s1 : _file.sections) { + for (const Section &s2 : _file.sections) { + if (&s1 == &s2) + continue; + if (overlaps(s1,s2)) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + } + + // Build side table of extra info about segments and sections. + SegExtraInfo t; + t.fileOffset = 0; + for (const Segment &sg : _file.segments) { + _segInfo[&sg] = t; + } + SectionExtraInfo t2; + t2.fileOffset = 0; + // Assign sections to segments. + for (const Section &s : _file.sections) { + _sectInfo[&s] = t2; + bool foundSegment = false; + for (const Segment &sg : _file.segments) { + if (sg.name.equals(s.segmentName)) { + if ((s.address >= sg.address) + && (s.address+s.content.size() <= sg.address+sg.size)) { + _segInfo[&sg].sections.push_back(&s); + foundSegment = true; + break; + } + } + } + if (!foundSegment) { + _ec = make_error_code(llvm::errc::executable_format_error); + return; + } + } + + // Assign file offsets. + uint32_t fileOffset = 0; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << "buildFileOffsets()\n"); + for (const Segment &sg : _file.segments) { + _segInfo[&sg].fileOffset = fileOffset; + if ((_seg1addr == INT64_MAX) && sg.init_access) + _seg1addr = sg.address; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " segment=" << sg.name + << ", fileOffset=" << _segInfo[&sg].fileOffset << "\n"); + + uint32_t segFileSize = 0; + // A segment that is not zero-fill must use a least one page of disk space. + if (sg.init_access) + segFileSize = _file.pageSize; + for (const Section *s : _segInfo[&sg].sections) { + uint32_t sectOffset = s->address - sg.address; + uint32_t sectFileSize = + isZeroFillSection(s->type) ? 0 : s->content.size(); + segFileSize = std::max(segFileSize, sectOffset + sectFileSize); + + _sectInfo[s].fileOffset = _segInfo[&sg].fileOffset + sectOffset; + DEBUG_WITH_TYPE("MachOFileLayout", + llvm::dbgs() << " section=" << s->sectionName + << ", fileOffset=" << fileOffset << "\n"); + } + + // round up all segments to page aligned, except __LINKEDIT + if (!sg.name.equals("__LINKEDIT")) { + _segInfo[&sg].fileSize = llvm::alignTo(segFileSize, _file.pageSize); + fileOffset = llvm::alignTo(fileOffset + segFileSize, _file.pageSize); + } + _addressOfLinkEdit = sg.address + sg.size; + } + _startOfLinkEdit = fileOffset; +} + +size_t MachOFileLayout::size() const { + return _endOfSymbolStrings; +} + +void MachOFileLayout::writeMachHeader() { + auto cpusubtype = MachOLinkingContext::cpuSubtypeFromArch(_file.arch); + // dynamic x86 executables on newer OS version should also set the + // CPU_SUBTYPE_LIB64 mask in the CPU subtype. + // FIXME: Check that this is a dynamic executable, not a static one. + if (_file.fileType == llvm::MachO::MH_EXECUTE && + cpusubtype == CPU_SUBTYPE_X86_64_ALL && + _file.os == MachOLinkingContext::OS::macOSX) { + uint32_t version; + bool failed = MachOLinkingContext::parsePackedVersion("10.5", version); + if (!failed && _file.minOSverson >= version) + cpusubtype |= CPU_SUBTYPE_LIB64; + } + + mach_header *mh = reinterpret_cast<mach_header*>(_buffer); + mh->magic = _is64 ? llvm::MachO::MH_MAGIC_64 : llvm::MachO::MH_MAGIC; + mh->cputype = MachOLinkingContext::cpuTypeFromArch(_file.arch); + mh->cpusubtype = cpusubtype; + mh->filetype = _file.fileType; + mh->ncmds = _countOfLoadCommands; + mh->sizeofcmds = _endOfLoadCommands - _startOfLoadCommands; + mh->flags = _file.flags; + if (_swap) + swapStruct(*mh); +} + +uint32_t MachOFileLayout::indirectSymbolIndex(const Section §, + uint32_t &index) { + if (sect.indirectSymbols.empty()) + return 0; + uint32_t result = index; + index += sect.indirectSymbols.size(); + return result; +} + +uint32_t MachOFileLayout::indirectSymbolElementSize(const Section §) { + if (sect.indirectSymbols.empty()) + return 0; + if (sect.type != S_SYMBOL_STUBS) + return 0; + return sect.content.size() / sect.indirectSymbols.size(); +} + +template <typename T> +llvm::Error MachOFileLayout::writeSingleSegmentLoadCommand(uint8_t *&lc) { + typename T::command* seg = reinterpret_cast<typename T::command*>(lc); + seg->cmd = T::LC; + seg->cmdsize = sizeof(typename T::command) + + _file.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + seg->cmdsize; + memset(seg->segname, 0, 16); + seg->vmaddr = 0; + seg->vmsize = _file.sections.back().address + + _file.sections.back().content.size(); + seg->fileoff = _endOfLoadCommands; + seg->filesize = seg->vmsize; + seg->maxprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->initprot = VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE; + seg->nsects = _file.sections.size(); + seg->flags = 0; + if (_swap) + swapStruct(*seg); + typename T::section *sout = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + uint32_t relOffset = _startOfRelocations; + uint32_t indirectSymRunningIndex = 0; + for (const Section &sin : _file.sections) { + setString16(sin.sectionName, sout->sectname); + setString16(sin.segmentName, sout->segname); + sout->addr = sin.address; + sout->size = sin.content.size(); + sout->offset = _sectInfo[&sin].fileOffset; + sout->align = llvm::Log2_32(sin.alignment); + sout->reloff = sin.relocations.empty() ? 0 : relOffset; + sout->nreloc = sin.relocations.size(); + sout->flags = sin.type | sin.attributes; + sout->reserved1 = indirectSymbolIndex(sin, indirectSymRunningIndex); + sout->reserved2 = indirectSymbolElementSize(sin); + relOffset += sin.relocations.size() * sizeof(any_relocation_info); + if (_swap) + swapStruct(*sout); + ++sout; + } + lc = next; + return llvm::Error(); +} + +template <typename T> +llvm::Error MachOFileLayout::writeSegmentLoadCommands(uint8_t *&lc) { + uint32_t indirectSymRunningIndex = 0; + for (const Segment &seg : _file.segments) { + // Link edit has no sections and a custom range of address, so handle it + // specially. + SegExtraInfo &segInfo = _segInfo[&seg]; + if (seg.name.equals("__LINKEDIT")) { + size_t linkeditSize = _endOfLinkEdit - _startOfLinkEdit; + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command); + uint8_t *next = lc + cmd->cmdsize; + setString16("__LINKEDIT", cmd->segname); + cmd->vmaddr = _addressOfLinkEdit; + cmd->vmsize = llvm::alignTo(linkeditSize, _file.pageSize); + cmd->fileoff = _startOfLinkEdit; + cmd->filesize = linkeditSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = 0; + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + lc = next; + continue; + } + // Write segment command with trailing sections. + typename T::command* cmd = reinterpret_cast<typename T::command*>(lc); + cmd->cmd = T::LC; + cmd->cmdsize = sizeof(typename T::command) + + segInfo.sections.size() * sizeof(typename T::section); + uint8_t *next = lc + cmd->cmdsize; + setString16(seg.name, cmd->segname); + cmd->vmaddr = seg.address; + cmd->vmsize = seg.size; + cmd->fileoff = segInfo.fileOffset; + cmd->filesize = segInfo.fileSize; + cmd->initprot = seg.init_access; + cmd->maxprot = seg.max_access; + cmd->nsects = segInfo.sections.size(); + cmd->flags = 0; + if (_swap) + swapStruct(*cmd); + typename T::section *sect = reinterpret_cast<typename T::section*> + (lc+sizeof(typename T::command)); + for (const Section *section : segInfo.sections) { + setString16(section->sectionName, sect->sectname); + setString16(section->segmentName, sect->segname); + sect->addr = section->address; + sect->size = section->content.size(); + if (isZeroFillSection(section->type)) + sect->offset = 0; + else + sect->offset = section->address - seg.address + segInfo.fileOffset; + sect->align = llvm::Log2_32(section->alignment); + sect->reloff = 0; + sect->nreloc = 0; + sect->flags = section->type | section->attributes; + sect->reserved1 = indirectSymbolIndex(*section, indirectSymRunningIndex); + sect->reserved2 = indirectSymbolElementSize(*section); + if (_swap) + swapStruct(*sect); + ++sect; + } + lc = reinterpret_cast<uint8_t*>(next); + } + return llvm::Error(); +} + +static void writeVersionMinLoadCommand(const NormalizedFile &_file, + bool _swap, + uint8_t *&lc) { + if (!_file.hasMinVersionLoadCommand) + return; + version_min_command *vm = reinterpret_cast<version_min_command*>(lc); + switch (_file.os) { + case MachOLinkingContext::OS::unknown: + vm->cmd = _file.minOSVersionKind; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = 0; + break; + case MachOLinkingContext::OS::macOSX: + vm->cmd = LC_VERSION_MIN_MACOSX; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + case MachOLinkingContext::OS::iOS: + case MachOLinkingContext::OS::iOS_simulator: + vm->cmd = LC_VERSION_MIN_IPHONEOS; + vm->cmdsize = sizeof(version_min_command); + vm->version = _file.minOSverson; + vm->sdk = _file.sdkVersion; + break; + } + if (_swap) + swapStruct(*vm); + lc += sizeof(version_min_command); +} + +llvm::Error MachOFileLayout::writeLoadCommands() { + uint8_t *lc = &_buffer[_startOfLoadCommands]; + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have one unnamed segment which holds all sections. + if (_is64) { + if (auto ec = writeSingleSegmentLoadCommand<MachO64Trait>(lc)) + return ec; + } else { + if (auto ec = writeSingleSegmentLoadCommand<MachO32Trait>(lc)) + return ec; + } + // Add LC_SYMTAB with symbol table info + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, + // LC_VERSION_MIN_WATCHOS, LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } else { + // Final linked images have sections under segments. + if (_is64) { + if (auto ec = writeSegmentLoadCommands<MachO64Trait>(lc)) + return ec; + } else { + if (auto ec = writeSegmentLoadCommands<MachO32Trait>(lc)) + return ec; + } + + // Add LC_ID_DYLIB command for dynamic libraries. + if (_file.fileType == llvm::MachO::MH_DYLIB) { + dylib_command *dc = reinterpret_cast<dylib_command*>(lc); + StringRef path = _file.installName; + uint32_t size = sizeof(dylib_command) + pointerAlign(path.size() + 1); + dc->cmd = LC_ID_DYLIB; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_LOAD_DYLIB + dc->dylib.timestamp = 1; + dc->dylib.current_version = _file.currentVersion; + dc->dylib.compatibility_version = _file.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc + sizeof(dylib_command), path.begin(), path.size()); + lc[sizeof(dylib_command) + path.size()] = '\0'; + lc += size; + } + + // Add LC_DYLD_INFO_ONLY. + dyld_info_command* di = reinterpret_cast<dyld_info_command*>(lc); + di->cmd = LC_DYLD_INFO_ONLY; + di->cmdsize = sizeof(dyld_info_command); + di->rebase_off = _rebaseInfo.size() ? _startOfRebaseInfo : 0; + di->rebase_size = _rebaseInfo.size(); + di->bind_off = _bindingInfo.size() ? _startOfBindingInfo : 0; + di->bind_size = _bindingInfo.size(); + di->weak_bind_off = 0; + di->weak_bind_size = 0; + di->lazy_bind_off = _lazyBindingInfo.size() ? _startOfLazyBindingInfo : 0; + di->lazy_bind_size = _lazyBindingInfo.size(); + di->export_off = _exportTrie.size() ? _startOfExportTrie : 0; + di->export_size = _exportTrie.size(); + if (_swap) + swapStruct(*di); + lc += sizeof(dyld_info_command); + + // Add LC_SYMTAB with symbol table info. + symtab_command* st = reinterpret_cast<symtab_command*>(lc); + st->cmd = LC_SYMTAB; + st->cmdsize = sizeof(symtab_command); + st->symoff = _startOfSymbols; + st->nsyms = _file.localSymbols.size() + _file.globalSymbols.size() + + _file.undefinedSymbols.size(); + st->stroff = _startOfSymbolStrings; + st->strsize = _endOfSymbolStrings - _startOfSymbolStrings; + if (_swap) + swapStruct(*st); + lc += sizeof(symtab_command); + + // Add LC_DYSYMTAB + if (_file.fileType != llvm::MachO::MH_PRELOAD) { + dysymtab_command* dst = reinterpret_cast<dysymtab_command*>(lc); + dst->cmd = LC_DYSYMTAB; + dst->cmdsize = sizeof(dysymtab_command); + dst->ilocalsym = _symbolTableLocalsStartIndex; + dst->nlocalsym = _file.localSymbols.size(); + dst->iextdefsym = _symbolTableGlobalsStartIndex; + dst->nextdefsym = _file.globalSymbols.size(); + dst->iundefsym = _symbolTableUndefinesStartIndex; + dst->nundefsym = _file.undefinedSymbols.size(); + dst->tocoff = 0; + dst->ntoc = 0; + dst->modtaboff = 0; + dst->nmodtab = 0; + dst->extrefsymoff = 0; + dst->nextrefsyms = 0; + dst->indirectsymoff = _startOfIndirectSymbols; + dst->nindirectsyms = _indirectSymbolTableCount; + dst->extreloff = 0; + dst->nextrel = 0; + dst->locreloff = 0; + dst->nlocrel = 0; + if (_swap) + swapStruct(*dst); + lc += sizeof(dysymtab_command); + } + + // If main executable, add LC_LOAD_DYLINKER + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_LOAD_DYLINKER load command. + uint32_t size=pointerAlign(sizeof(dylinker_command)+dyldPath().size()+1); + dylinker_command* dl = reinterpret_cast<dylinker_command*>(lc); + dl->cmd = LC_LOAD_DYLINKER; + dl->cmdsize = size; + dl->name = sizeof(dylinker_command); // offset + if (_swap) + swapStruct(*dl); + memcpy(lc+sizeof(dylinker_command), dyldPath().data(), dyldPath().size()); + lc[sizeof(dylinker_command)+dyldPath().size()] = '\0'; + lc += size; + } + + // Add LC_VERSION_MIN_MACOSX, LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_WATCHOS, + // LC_VERSION_MIN_TVOS + writeVersionMinLoadCommand(_file, _swap, lc); + + // Add LC_SOURCE_VERSION + { + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + source_version_command sv; + sv.cmd = LC_SOURCE_VERSION; + sv.cmdsize = sizeof(source_version_command); + sv.version = _file.sourceVersion; + if (_swap) + swapStruct(sv); + memcpy(lc, &sv, sizeof(source_version_command)); + lc += sizeof(source_version_command); + } + + // If main executable, add LC_MAIN. + if (_file.fileType == llvm::MachO::MH_EXECUTE) { + // Build LC_MAIN load command. + // Note, using a temporary here to appease UB as we may not be aligned + // enough for a struct containing a uint64_t when emitting a 32-bit binary + entry_point_command ep; + ep.cmd = LC_MAIN; + ep.cmdsize = sizeof(entry_point_command); + ep.entryoff = _file.entryAddress - _seg1addr; + ep.stacksize = _file.stackSize; + if (_swap) + swapStruct(ep); + memcpy(lc, &ep, sizeof(entry_point_command)); + lc += sizeof(entry_point_command); + } + + // Add LC_LOAD_DYLIB commands + for (const DependentDylib &dep : _file.dependentDylibs) { + dylib_command* dc = reinterpret_cast<dylib_command*>(lc); + uint32_t size = sizeof(dylib_command) + pointerAlign(dep.path.size()+1); + dc->cmd = dep.kind; + dc->cmdsize = size; + dc->dylib.name = sizeof(dylib_command); // offset + // needs to be some constant value different than the one in LC_ID_DYLIB + dc->dylib.timestamp = 2; + dc->dylib.current_version = dep.currentVersion; + dc->dylib.compatibility_version = dep.compatVersion; + if (_swap) + swapStruct(*dc); + memcpy(lc+sizeof(dylib_command), dep.path.begin(), dep.path.size()); + lc[sizeof(dylib_command)+dep.path.size()] = '\0'; + lc += size; + } + + // Add LC_RPATH + for (const StringRef &path : _file.rpaths) { + rpath_command *rpc = reinterpret_cast<rpath_command *>(lc); + uint32_t size = pointerAlign(sizeof(rpath_command) + path.size() + 1); + rpc->cmd = LC_RPATH; + rpc->cmdsize = size; + rpc->path = sizeof(rpath_command); // offset + if (_swap) + swapStruct(*rpc); + memcpy(lc+sizeof(rpath_command), path.begin(), path.size()); + lc[sizeof(rpath_command)+path.size()] = '\0'; + lc += size; + } + + // Add LC_FUNCTION_STARTS if needed. + if (_functionStartsSize != 0) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_FUNCTION_STARTS; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfFunctionStarts; + dl->datasize = _functionStartsSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + + // Add LC_DATA_IN_CODE if requested. + if (_file.generateDataInCodeLoadCommand) { + linkedit_data_command* dl = reinterpret_cast<linkedit_data_command*>(lc); + dl->cmd = LC_DATA_IN_CODE; + dl->cmdsize = sizeof(linkedit_data_command); + dl->dataoff = _startOfDataInCode; + dl->datasize = _dataInCodeSize; + if (_swap) + swapStruct(*dl); + lc += sizeof(linkedit_data_command); + } + } + return llvm::Error(); +} + +void MachOFileLayout::writeSectionContent() { + for (const Section &s : _file.sections) { + // Copy all section content to output buffer. + if (isZeroFillSection(s.type)) + continue; + if (s.content.empty()) + continue; + uint32_t offset = _sectInfo[&s].fileOffset; + uint8_t *p = &_buffer[offset]; + memcpy(p, &s.content[0], s.content.size()); + p += s.content.size(); + } +} + +void MachOFileLayout::writeRelocations() { + uint32_t relOffset = _startOfRelocations; + for (Section sect : _file.sections) { + for (Relocation r : sect.relocations) { + any_relocation_info* rb = reinterpret_cast<any_relocation_info*>( + &_buffer[relOffset]); + *rb = packRelocation(r, _swap, _bigEndianArch); + relOffset += sizeof(any_relocation_info); + } + } +} + +void MachOFileLayout::appendSymbols(const std::vector<Symbol> &symbols, + uint32_t &symOffset, uint32_t &strOffset) { + for (const Symbol &sym : symbols) { + if (_is64) { + nlist_64* nb = reinterpret_cast<nlist_64*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist_64); + } else { + nlist* nb = reinterpret_cast<nlist*>(&_buffer[symOffset]); + nb->n_strx = strOffset - _startOfSymbolStrings; + nb->n_type = sym.type | sym.scope; + nb->n_sect = sym.sect; + nb->n_desc = sym.desc; + nb->n_value = sym.value; + if (_swap) + swapStruct(*nb); + symOffset += sizeof(nlist); + } + memcpy(&_buffer[strOffset], sym.name.begin(), sym.name.size()); + strOffset += sym.name.size(); + _buffer[strOffset++] ='\0'; // Strings in table have nul terminator. + } +} + +void MachOFileLayout::writeFunctionStartsInfo() { + if (!_functionStartsSize) + return; + memcpy(&_buffer[_startOfFunctionStarts], _file.functionStarts.data(), + _functionStartsSize); +} + +void MachOFileLayout::writeDataInCodeInfo() { + uint32_t offset = _startOfDataInCode; + for (const DataInCode &entry : _file.dataInCode) { + data_in_code_entry *dst = reinterpret_cast<data_in_code_entry*>( + &_buffer[offset]); + dst->offset = entry.offset; + dst->length = entry.length; + dst->kind = entry.kind; + if (_swap) + swapStruct(*dst); + offset += sizeof(data_in_code_entry); + } +} + +void MachOFileLayout::writeSymbolTable() { + // Write symbol table and symbol strings in parallel. + uint32_t symOffset = _startOfSymbols; + uint32_t strOffset = _startOfSymbolStrings; + _buffer[strOffset++] = '\0'; // Reserve n_strx offset of zero to mean no name. + appendSymbols(_file.localSymbols, symOffset, strOffset); + appendSymbols(_file.globalSymbols, symOffset, strOffset); + appendSymbols(_file.undefinedSymbols, symOffset, strOffset); + // Write indirect symbol table array. + uint32_t *indirects = reinterpret_cast<uint32_t*> + (&_buffer[_startOfIndirectSymbols]); + if (_file.fileType == llvm::MachO::MH_OBJECT) { + // Object files have sections in same order as input normalized file. + for (const Section §ion : _file.sections) { + for (uint32_t index : section.indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } else { + // Final linked images must sort sections from normalized file. + for (const Segment &seg : _file.segments) { + SegExtraInfo &segInfo = _segInfo[&seg]; + for (const Section *section : segInfo.sections) { + for (uint32_t index : section->indirectSymbols) { + if (_swap) + *indirects++ = llvm::sys::getSwappedBytes(index); + else + *indirects++ = index; + } + } + } + } +} + +void MachOFileLayout::writeRebaseInfo() { + memcpy(&_buffer[_startOfRebaseInfo], _rebaseInfo.bytes(), _rebaseInfo.size()); +} + +void MachOFileLayout::writeBindingInfo() { + memcpy(&_buffer[_startOfBindingInfo], + _bindingInfo.bytes(), _bindingInfo.size()); +} + +void MachOFileLayout::writeLazyBindingInfo() { + memcpy(&_buffer[_startOfLazyBindingInfo], + _lazyBindingInfo.bytes(), _lazyBindingInfo.size()); +} + +void MachOFileLayout::writeExportInfo() { + memcpy(&_buffer[_startOfExportTrie], _exportTrie.bytes(), _exportTrie.size()); +} + +void MachOFileLayout::buildLinkEditInfo() { + buildRebaseInfo(); + buildBindInfo(); + buildLazyBindInfo(); + buildExportTrie(); + computeSymbolTableSizes(); + computeFunctionStartsSize(); + computeDataInCodeSize(); +} + +void MachOFileLayout::buildSectionRelocations() { + +} + +void MachOFileLayout::buildRebaseInfo() { + // TODO: compress rebasing info. + for (const RebaseLocation& entry : _file.rebasingInfo) { + _rebaseInfo.append_byte(REBASE_OPCODE_SET_TYPE_IMM | entry.kind); + _rebaseInfo.append_byte(REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _rebaseInfo.append_uleb128(entry.segOffset); + _rebaseInfo.append_uleb128(REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1); + } + _rebaseInfo.append_byte(REBASE_OPCODE_DONE); + _rebaseInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildBindInfo() { + // TODO: compress bind info. + uint64_t lastAddend = 0; + for (const BindLocation& entry : _file.bindingInfo) { + _bindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _bindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _bindingInfo.append_uleb128(entry.segOffset); + if (entry.ordinal > 0) + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + (entry.ordinal & 0xF)); + else + _bindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & 0xF)); + _bindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _bindingInfo.append_string(entry.symbolName); + if (entry.addend != lastAddend) { + _bindingInfo.append_byte(BIND_OPCODE_SET_ADDEND_SLEB); + _bindingInfo.append_sleb128(entry.addend); + lastAddend = entry.addend; + } + _bindingInfo.append_byte(BIND_OPCODE_DO_BIND); + } + _bindingInfo.append_byte(BIND_OPCODE_DONE); + _bindingInfo.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::buildLazyBindInfo() { + for (const BindLocation& entry : _file.lazyBindingInfo) { + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_TYPE_IMM | entry.kind); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB + | entry.segIndex); + _lazyBindingInfo.append_uleb128Fixed(entry.segOffset, 5); + if (entry.ordinal > 0) + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | + (entry.ordinal & 0xF)); + else + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | + (entry.ordinal & 0xF)); + _lazyBindingInfo.append_byte(BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); + _lazyBindingInfo.append_string(entry.symbolName); + _lazyBindingInfo.append_byte(BIND_OPCODE_DO_BIND); + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + } + _lazyBindingInfo.append_byte(BIND_OPCODE_DONE); + _lazyBindingInfo.align(_is64 ? 8 : 4); +} + +void TrieNode::addSymbol(const Export& entry, + BumpPtrAllocator &allocator, + std::vector<TrieNode*> &allNodes) { + StringRef partialStr = entry.name.drop_front(_cummulativeString.size()); + for (TrieEdge &edge : _children) { + StringRef edgeStr = edge._subString; + if (partialStr.startswith(edgeStr)) { + // Already have matching edge, go down that path. + edge._child->addSymbol(entry, allocator, allNodes); + return; + } + // See if string has commmon prefix with existing edge. + for (int n=edgeStr.size()-1; n > 0; --n) { + if (partialStr.substr(0, n).equals(edgeStr.substr(0, n))) { + // Splice in new node: was A -> C, now A -> B -> C + StringRef bNodeStr = edge._child->_cummulativeString; + bNodeStr = bNodeStr.drop_back(edgeStr.size()-n).copy(allocator); + auto *bNode = new (allocator) TrieNode(bNodeStr); + allNodes.push_back(bNode); + TrieNode* cNode = edge._child; + StringRef abEdgeStr = edgeStr.substr(0,n).copy(allocator); + StringRef bcEdgeStr = edgeStr.substr(n).copy(allocator); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "splice in TrieNode('" << bNodeStr + << "') between edge '" + << abEdgeStr << "' and edge='" + << bcEdgeStr<< "'\n"); + TrieEdge& abEdge = edge; + abEdge._subString = abEdgeStr; + abEdge._child = bNode; + auto *bcEdge = new (allocator) TrieEdge(bcEdgeStr, cNode); + bNode->_children.insert(bNode->_children.end(), bcEdge); + bNode->addSymbol(entry, allocator, allNodes); + return; + } + } + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + assert(entry.otherOffset != 0); + } + if (entry.flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) { + assert(entry.otherOffset != 0); + } + // No commonality with any existing child, make a new edge. + auto *newNode = new (allocator) TrieNode(entry.name.copy(allocator)); + auto *newEdge = new (allocator) TrieEdge(partialStr, newNode); + _children.insert(_children.end(), newEdge); + DEBUG_WITH_TYPE("trie-builder", llvm::dbgs() + << "new TrieNode('" << entry.name << "') with edge '" + << partialStr << "' from node='" + << _cummulativeString << "'\n"); + newNode->_address = entry.offset; + newNode->_flags = entry.flags | entry.kind; + newNode->_other = entry.otherOffset; + if ((entry.flags & EXPORT_SYMBOL_FLAGS_REEXPORT) && !entry.otherName.empty()) + newNode->_importedName = entry.otherName.copy(allocator); + newNode->_hasExportInfo = true; + allNodes.push_back(newNode); +} + +bool TrieNode::updateOffset(uint32_t& offset) { + uint32_t nodeSize = 1; // Length when no export info + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + nodeSize = llvm::getULEB128Size(_flags); + nodeSize += llvm::getULEB128Size(_other); // Other contains ordinal. + nodeSize += _importedName.size(); + ++nodeSize; // Trailing zero in imported name. + } else { + nodeSize = llvm::getULEB128Size(_flags) + llvm::getULEB128Size(_address); + if (_flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER) + nodeSize += llvm::getULEB128Size(_other); + } + // Overall node size so far is uleb128 of export info + actual export info. + nodeSize += llvm::getULEB128Size(nodeSize); + } + // Compute size of all child edges. + ++nodeSize; // Byte for number of chidren. + for (TrieEdge &edge : _children) { + nodeSize += edge._subString.size() + 1 // String length. + + llvm::getULEB128Size(edge._child->_trieOffset); // Offset len. + } + // On input, 'offset' is new prefered location for this node. + bool result = (_trieOffset != offset); + // Store new location in node object for use by parents. + _trieOffset = offset; + // Update offset for next iteration. + offset += nodeSize; + // Return true if _trieOffset was changed. + return result; +} + +void TrieNode::appendToByteBuffer(ByteBuffer &out) { + if (_hasExportInfo) { + if (_flags & EXPORT_SYMBOL_FLAGS_REEXPORT) { + if (!_importedName.empty()) { + // nodes with re-export info: size, flags, ordinal, import-name + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + + _importedName.size() + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_string(_importedName); + } else { + // nodes without re-export info: size, flags, ordinal, empty-string + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_other) + 1; + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_other); + out.append_byte(0); + } + } else if ( _flags & EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER ) { + // Nodes with export info: size, flags, address, other + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address) + + llvm::getULEB128Size(_other); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + out.append_uleb128(_other); + } else { + // Nodes with export info: size, flags, address + uint32_t nodeSize = llvm::getULEB128Size(_flags) + + llvm::getULEB128Size(_address); + assert(nodeSize < 256); + out.append_byte(nodeSize); + out.append_uleb128(_flags); + out.append_uleb128(_address); + } + } else { + // Node with no export info. + uint32_t nodeSize = 0; + out.append_byte(nodeSize); + } + // Add number of children. + assert(_children.size() < 256); + out.append_byte(_children.size()); + // Append each child edge substring and node offset. + for (TrieEdge &edge : _children) { + out.append_string(edge._subString); + out.append_uleb128(edge._child->_trieOffset); + } +} + +void MachOFileLayout::buildExportTrie() { + if (_file.exportInfo.empty()) + return; + + // For all temporary strings and objects used building trie. + BumpPtrAllocator allocator; + + // Build trie of all exported symbols. + auto *rootNode = new (allocator) TrieNode(StringRef()); + std::vector<TrieNode*> allNodes; + allNodes.reserve(_file.exportInfo.size()*2); + allNodes.push_back(rootNode); + for (const Export& entry : _file.exportInfo) { + rootNode->addSymbol(entry, allocator, allNodes); + } + + // Assign each node in the vector an offset in the trie stream, iterating + // until all uleb128 sizes have stabilized. + bool more; + do { + uint32_t offset = 0; + more = false; + for (TrieNode* node : allNodes) { + if (node->updateOffset(offset)) + more = true; + } + } while (more); + + // Serialize trie to ByteBuffer. + for (TrieNode* node : allNodes) { + node->appendToByteBuffer(_exportTrie); + } + _exportTrie.align(_is64 ? 8 : 4); +} + +void MachOFileLayout::computeSymbolTableSizes() { + // MachO symbol tables have three ranges: locals, globals, and undefines + const size_t nlistSize = (_is64 ? sizeof(nlist_64) : sizeof(nlist)); + _symbolTableSize = nlistSize * (_file.localSymbols.size() + + _file.globalSymbols.size() + + _file.undefinedSymbols.size()); + _symbolStringPoolSize = 1; // Always reserve 1-byte for the empty string. + for (const Symbol &sym : _file.localSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.globalSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + for (const Symbol &sym : _file.undefinedSymbols) { + _symbolStringPoolSize += (sym.name.size()+1); + } + _symbolTableLocalsStartIndex = 0; + _symbolTableGlobalsStartIndex = _file.localSymbols.size(); + _symbolTableUndefinesStartIndex = _symbolTableGlobalsStartIndex + + _file.globalSymbols.size(); + + _indirectSymbolTableCount = 0; + for (const Section § : _file.sections) { + _indirectSymbolTableCount += sect.indirectSymbols.size(); + } +} + +void MachOFileLayout::computeFunctionStartsSize() { + _functionStartsSize = _file.functionStarts.size(); +} + +void MachOFileLayout::computeDataInCodeSize() { + _dataInCodeSize = _file.dataInCode.size() * sizeof(data_in_code_entry); +} + +void MachOFileLayout::writeLinkEditContent() { + if (_file.fileType == llvm::MachO::MH_OBJECT) { + writeRelocations(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } else { + writeRebaseInfo(); + writeBindingInfo(); + writeLazyBindingInfo(); + // TODO: add weak binding info + writeExportInfo(); + writeFunctionStartsInfo(); + writeDataInCodeInfo(); + writeSymbolTable(); + } +} + +llvm::Error MachOFileLayout::writeBinary(StringRef path) { + // Check for pending error from constructor. + if (_ec) + return llvm::errorCodeToError(_ec); + // Create FileOutputBuffer with calculated size. + unsigned flags = 0; + if (_file.fileType != llvm::MachO::MH_OBJECT) + flags = llvm::FileOutputBuffer::F_executable; + ErrorOr<std::unique_ptr<llvm::FileOutputBuffer>> fobOrErr = + llvm::FileOutputBuffer::create(path, size(), flags); + if (std::error_code ec = fobOrErr.getError()) + return llvm::errorCodeToError(ec); + std::unique_ptr<llvm::FileOutputBuffer> &fob = *fobOrErr; + // Write content. + _buffer = fob->getBufferStart(); + writeMachHeader(); + if (auto ec = writeLoadCommands()) + return ec; + writeSectionContent(); + writeLinkEditContent(); + fob->commit(); + + return llvm::Error(); +} + +/// Takes in-memory normalized view and writes a mach-o object file. +llvm::Error writeBinary(const NormalizedFile &file, StringRef path) { + MachOFileLayout layout(file); + return layout.writeBinary(path); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp new file mode 100644 index 00000000000..4775c75f721 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp @@ -0,0 +1,1425 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileFromAtoms.cpp ------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory Atoms to in-memory normalized mach-o. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// ^ +/// | +/// | +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include <map> +#include <system_error> + +using llvm::StringRef; +using llvm::isa; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using namespace lld; + +namespace { + +struct AtomInfo { + const DefinedAtom *atom; + uint64_t offsetInSection; +}; + +struct SectionInfo { + SectionInfo(StringRef seg, StringRef sect, SectionType type, + const MachOLinkingContext &ctxt, uint32_t attr, + bool relocsToDefinedCanBeImplicit); + + StringRef segmentName; + StringRef sectionName; + SectionType type; + uint32_t attributes; + uint64_t address; + uint64_t size; + uint16_t alignment; + + /// If this is set, the any relocs in this section which point to defined + /// addresses can be implicitly generated. This is the case for the + /// __eh_frame section where references to the function can be implicit if the + /// function is defined. + bool relocsToDefinedCanBeImplicit; + + + std::vector<AtomInfo> atomsAndOffsets; + uint32_t normalizedSectionIndex; + uint32_t finalSectionIndex; +}; + +SectionInfo::SectionInfo(StringRef sg, StringRef sct, SectionType t, + const MachOLinkingContext &ctxt, uint32_t attrs, + bool relocsToDefinedCanBeImplicit) + : segmentName(sg), sectionName(sct), type(t), attributes(attrs), + address(0), size(0), alignment(1), + relocsToDefinedCanBeImplicit(relocsToDefinedCanBeImplicit), + normalizedSectionIndex(0), finalSectionIndex(0) { + uint16_t align = 1; + if (ctxt.sectionAligned(segmentName, sectionName, align)) { + alignment = align; + } +} + +struct SegmentInfo { + SegmentInfo(StringRef name); + + StringRef name; + uint64_t address; + uint64_t size; + uint32_t init_access; + uint32_t max_access; + std::vector<SectionInfo*> sections; + uint32_t normalizedSegmentIndex; +}; + +SegmentInfo::SegmentInfo(StringRef n) + : name(n), address(0), size(0), init_access(0), max_access(0), + normalizedSegmentIndex(0) { +} + +class Util { +public: + Util(const MachOLinkingContext &ctxt) + : _ctx(ctxt), _archHandler(ctxt.archHandler()), _entryAtom(nullptr), + _hasTLVDescriptors(false), _subsectionsViaSymbols(true) {} + ~Util(); + + void processDefinedAtoms(const lld::File &atomFile); + void processAtomAttributes(const DefinedAtom *atom); + void assignAtomToSection(const DefinedAtom *atom); + void organizeSections(); + void assignAddressesToSections(const NormalizedFile &file); + uint32_t fileFlags(); + void copySegmentInfo(NormalizedFile &file); + void copySectionInfo(NormalizedFile &file); + void updateSectionInfo(NormalizedFile &file); + void buildAtomToAddressMap(); + llvm::Error addSymbols(const lld::File &atomFile, NormalizedFile &file); + void addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file); + void addRebaseAndBindingInfo(const lld::File &, NormalizedFile &file); + void addExportInfo(const lld::File &, NormalizedFile &file); + void addSectionRelocs(const lld::File &, NormalizedFile &file); + void addFunctionStarts(const lld::File &, NormalizedFile &file); + void buildDataInCodeArray(const lld::File &, NormalizedFile &file); + void addDependentDylibs(const lld::File &, NormalizedFile &file); + void copyEntryPointAddress(NormalizedFile &file); + void copySectionContent(NormalizedFile &file); + + bool allSourceFilesHaveMinVersions() const { + return _allSourceFilesHaveMinVersions; + } + + uint32_t minVersion() const { + return _minVersion; + } + + LoadCommandType minVersionCommandType() const { + return _minVersionCommandType; + } + +private: + typedef std::map<DefinedAtom::ContentType, SectionInfo*> TypeToSection; + typedef llvm::DenseMap<const Atom*, uint64_t> AtomToAddress; + + struct DylibInfo { int ordinal; bool hasWeak; bool hasNonWeak; }; + typedef llvm::StringMap<DylibInfo> DylibPathToInfo; + + SectionInfo *sectionForAtom(const DefinedAtom*); + SectionInfo *getRelocatableSection(DefinedAtom::ContentType type); + SectionInfo *getFinalSection(DefinedAtom::ContentType type); + void appendAtom(SectionInfo *sect, const DefinedAtom *atom); + SegmentInfo *segmentForName(StringRef segName); + void layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr); + void layoutSectionsInTextSegment(size_t, SegmentInfo *, uint64_t &); + void copySectionContent(SectionInfo *si, ContentBytes &content); + uint16_t descBits(const DefinedAtom* atom); + int dylibOrdinal(const SharedLibraryAtom *sa); + void segIndexForSection(const SectionInfo *sect, + uint8_t &segmentIndex, uint64_t &segmentStartAddr); + const Atom *targetOfLazyPointer(const DefinedAtom *lpAtom); + const Atom *targetOfStub(const DefinedAtom *stubAtom); + llvm::Error getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &symbolScope); + void appendSection(SectionInfo *si, NormalizedFile &file); + uint32_t sectionIndexForAtom(const Atom *atom); + + typedef llvm::DenseMap<const Atom*, uint32_t> AtomToIndex; + struct AtomAndIndex { const Atom *atom; uint32_t index; SymbolScope scope; }; + struct AtomSorter { + bool operator()(const AtomAndIndex &left, const AtomAndIndex &right); + }; + struct SegmentSorter { + bool operator()(const SegmentInfo *left, const SegmentInfo *right); + static unsigned weight(const SegmentInfo *); + }; + struct TextSectionSorter { + bool operator()(const SectionInfo *left, const SectionInfo *right); + static unsigned weight(const SectionInfo *); + }; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + llvm::BumpPtrAllocator _allocator; + std::vector<SectionInfo*> _sectionInfos; + std::vector<SegmentInfo*> _segmentInfos; + TypeToSection _sectionMap; + std::vector<SectionInfo*> _customSections; + AtomToAddress _atomToAddress; + DylibPathToInfo _dylibInfo; + const DefinedAtom *_entryAtom; + AtomToIndex _atomToSymbolIndex; + std::vector<const Atom *> _machHeaderAliasAtoms; + bool _hasTLVDescriptors; + bool _subsectionsViaSymbols; + bool _allSourceFilesHaveMinVersions = true; + LoadCommandType _minVersionCommandType = (LoadCommandType)0; + uint32_t _minVersion = 0; +}; + +Util::~Util() { + // The SectionInfo structs are BumpPtr allocated, but atomsAndOffsets needs + // to be deleted. + for (SectionInfo *si : _sectionInfos) { + // clear() destroys vector elements, but does not deallocate. + // Instead use swap() to deallocate vector buffer. + std::vector<AtomInfo> empty; + si->atomsAndOffsets.swap(empty); + } + // The SegmentInfo structs are BumpPtr allocated, but sections needs + // to be deleted. + for (SegmentInfo *sgi : _segmentInfos) { + std::vector<SectionInfo*> empty2; + sgi->sections.swap(empty2); + } +} + +SectionInfo *Util::getRelocatableSection(DefinedAtom::ContentType type) { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + SectionAttr sectionAttrs; + bool relocsToDefinedCanBeImplicit; + + // Use same table used by when parsing .o files. + relocatableSectionInfoForContentType(type, segmentName, sectionName, + sectionType, sectionAttrs, + relocsToDefinedCanBeImplicit); + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(sectionName) && + sect.second->segmentName.equals(segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) + SectionInfo(segmentName, sectionName, sectionType, _ctx, sectionAttrs, + relocsToDefinedCanBeImplicit); + _sectionInfos.push_back(sect); + _sectionMap[type] = sect; + return sect; +} + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachOFinalSectionFromAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachOFinalSectionFromAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeMachHeader), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__const", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__const", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__stubs", S_SYMBOL_STUBS, typeStub), + ENTRY("__TEXT", "__stub_helper", S_REGULAR, typeStubHelper), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__unwind_info", S_REGULAR, typeProcessedUnwindInfo), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__la_symbol_ptr", S_LAZY_SYMBOL_POINTERS, + typeLazyPointer), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__nl_symbol_ptr", S_NON_LAZY_SYMBOL_POINTERS, + typeNonLazyPointer), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, + typeTLVInitialData), + ENTRY("__DATA", "__thread_ptrs", S_THREAD_LOCAL_VARIABLE_POINTERS, + typeTLVInitializerPtr), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), +}; +#undef ENTRY + +SectionInfo *Util::getFinalSection(DefinedAtom::ContentType atomType) { + for (auto &p : sectsToAtomType) { + if (p.atomType != atomType) + continue; + SectionAttr sectionAttrs = 0; + switch (atomType) { + case DefinedAtom::typeMachHeader: + case DefinedAtom::typeCode: + case DefinedAtom::typeStub: + case DefinedAtom::typeStubHelper: + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS | S_ATTR_SOME_INSTRUCTIONS; + break; + case DefinedAtom::typeThunkTLV: + _hasTLVDescriptors = true; + break; + default: + break; + } + // If we already have a SectionInfo with this name, re-use it. + // This can happen if two ContentType map to the same mach-o section. + for (auto sect : _sectionMap) { + if (sect.second->sectionName.equals(p.sectionName) && + sect.second->segmentName.equals(p.segmentName)) { + return sect.second; + } + } + // Otherwise allocate new SectionInfo object. + auto *sect = new (_allocator) SectionInfo( + p.segmentName, p.sectionName, p.sectionType, _ctx, sectionAttrs, + /* relocsToDefinedCanBeImplicit */ false); + _sectionInfos.push_back(sect); + _sectionMap[atomType] = sect; + return sect; + } + llvm_unreachable("content type not yet supported"); +} + +SectionInfo *Util::sectionForAtom(const DefinedAtom *atom) { + if (atom->sectionChoice() == DefinedAtom::sectionBasedOnContent) { + // Section for this atom is derived from content type. + DefinedAtom::ContentType type = atom->contentType(); + auto pos = _sectionMap.find(type); + if ( pos != _sectionMap.end() ) + return pos->second; + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + return rMode ? getRelocatableSection(type) : getFinalSection(type); + } else { + // This atom needs to be in a custom section. + StringRef customName = atom->customSectionName(); + // Look to see if we have already allocated the needed custom section. + for(SectionInfo *sect : _customSections) { + const DefinedAtom *firstAtom = sect->atomsAndOffsets.front().atom; + if (firstAtom->customSectionName().equals(customName)) { + return sect; + } + } + // Not found, so need to create a new custom section. + size_t seperatorIndex = customName.find('/'); + assert(seperatorIndex != StringRef::npos); + StringRef segName = customName.slice(0, seperatorIndex); + StringRef sectName = customName.drop_front(seperatorIndex + 1); + auto *sect = + new (_allocator) SectionInfo(segName, sectName, S_REGULAR, _ctx, + 0, /* relocsToDefinedCanBeImplicit */ false); + _customSections.push_back(sect); + _sectionInfos.push_back(sect); + return sect; + } +} + +void Util::appendAtom(SectionInfo *sect, const DefinedAtom *atom) { + // Figure out offset for atom in this section given alignment constraints. + uint64_t offset = sect->size; + DefinedAtom::Alignment atomAlign = atom->alignment(); + uint64_t align = atomAlign.value; + uint64_t requiredModulus = atomAlign.modulus; + uint64_t currentModulus = (offset % align); + if ( currentModulus != requiredModulus ) { + if ( requiredModulus > currentModulus ) + offset += requiredModulus-currentModulus; + else + offset += align+requiredModulus-currentModulus; + } + // Record max alignment of any atom in this section. + if (align > sect->alignment) + sect->alignment = atomAlign.value; + // Assign atom to this section with this offset. + AtomInfo ai = {atom, offset}; + sect->atomsAndOffsets.push_back(ai); + // Update section size to include this atom. + sect->size = offset + atom->size(); +} + +void Util::processDefinedAtoms(const lld::File &atomFile) { + for (const DefinedAtom *atom : atomFile.defined()) { + processAtomAttributes(atom); + assignAtomToSection(atom); + } +} + +void Util::processAtomAttributes(const DefinedAtom *atom) { + if (auto *machoFile = dyn_cast<mach_o::MachOFile>(&atom->file())) { + // If the file doesn't use subsections via symbols, then make sure we don't + // add that flag to the final output file if we have a relocatable file. + if (!machoFile->subsectionsViaSymbols()) + _subsectionsViaSymbols = false; + + // All the source files must have min versions for us to output an object + // file with a min version. + if (auto v = machoFile->minVersion()) + _minVersion = std::max(_minVersion, v); + else + _allSourceFilesHaveMinVersions = false; + + // If we don't have a platform load command, but one of the source files + // does, then take the one from the file. + if (!_minVersionCommandType) + if (auto v = machoFile->minVersionLoadCommandKind()) + _minVersionCommandType = v; + } +} + +void Util::assignAtomToSection(const DefinedAtom *atom) { + if (atom->contentType() == DefinedAtom::typeMachHeader) { + _machHeaderAliasAtoms.push_back(atom); + // Assign atom to this section with this offset. + AtomInfo ai = {atom, 0}; + sectionForAtom(atom)->atomsAndOffsets.push_back(ai); + } else if (atom->contentType() == DefinedAtom::typeDSOHandle) + _machHeaderAliasAtoms.push_back(atom); + else + appendAtom(sectionForAtom(atom), atom); +} + +SegmentInfo *Util::segmentForName(StringRef segName) { + for (SegmentInfo *si : _segmentInfos) { + if ( si->name.equals(segName) ) + return si; + } + auto *info = new (_allocator) SegmentInfo(segName); + + // Set the initial segment protection. + if (segName.equals("__TEXT")) + info->init_access = VM_PROT_READ | VM_PROT_EXECUTE; + else if (segName.equals("__PAGEZERO")) + info->init_access = 0; + else if (segName.equals("__LINKEDIT")) + info->init_access = VM_PROT_READ; + else { + // All others default to read-write + info->init_access = VM_PROT_READ | VM_PROT_WRITE; + } + + // Set max segment protection + // Note, its overkill to use a switch statement here, but makes it so much + // easier to use switch coverage to catch new cases. + switch (_ctx.os()) { + case lld::MachOLinkingContext::OS::unknown: + case lld::MachOLinkingContext::OS::macOSX: + case lld::MachOLinkingContext::OS::iOS_simulator: + if (segName.equals("__PAGEZERO")) { + info->max_access = 0; + break; + } + // All others default to all + info->max_access = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + break; + case lld::MachOLinkingContext::OS::iOS: + // iPhoneOS always uses same protection for max and initial + info->max_access = info->init_access; + break; + } + _segmentInfos.push_back(info); + return info; +} + +unsigned Util::SegmentSorter::weight(const SegmentInfo *seg) { + return llvm::StringSwitch<unsigned>(seg->name) + .Case("__PAGEZERO", 1) + .Case("__TEXT", 2) + .Case("__DATA", 3) + .Default(100); +} + +bool Util::SegmentSorter::operator()(const SegmentInfo *left, + const SegmentInfo *right) { + return (weight(left) < weight(right)); +} + +unsigned Util::TextSectionSorter::weight(const SectionInfo *sect) { + return llvm::StringSwitch<unsigned>(sect->sectionName) + .Case("__text", 1) + .Case("__stubs", 2) + .Case("__stub_helper", 3) + .Case("__const", 4) + .Case("__cstring", 5) + .Case("__unwind_info", 98) + .Case("__eh_frame", 99) + .Default(10); +} + +bool Util::TextSectionSorter::operator()(const SectionInfo *left, + const SectionInfo *right) { + return (weight(left) < weight(right)); +} + +void Util::organizeSections() { + // NOTE!: Keep this in sync with assignAddressesToSections. + switch (_ctx.outputMachOType()) { + case llvm::MachO::MH_EXECUTE: + // Main executables, need a zero-page segment + segmentForName("__PAGEZERO"); + // Fall into next case. + case llvm::MachO::MH_DYLIB: + case llvm::MachO::MH_BUNDLE: + // All dynamic code needs TEXT segment to hold the load commands. + segmentForName("__TEXT"); + break; + default: + break; + } + segmentForName("__LINKEDIT"); + + // Group sections into segments. + for (SectionInfo *si : _sectionInfos) { + SegmentInfo *seg = segmentForName(si->segmentName); + seg->sections.push_back(si); + } + // Sort segments. + std::sort(_segmentInfos.begin(), _segmentInfos.end(), SegmentSorter()); + + // Sort sections within segments. + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__TEXT")) { + std::sort(seg->sections.begin(), seg->sections.end(), + TextSectionSorter()); + } + } + + // Record final section indexes. + uint32_t segmentIndex = 0; + uint32_t sectionIndex = 1; + for (SegmentInfo *seg : _segmentInfos) { + seg->normalizedSegmentIndex = segmentIndex++; + for (SectionInfo *sect : seg->sections) + sect->finalSectionIndex = sectionIndex++; + } +} + +void Util::layoutSectionsInSegment(SegmentInfo *seg, uint64_t &addr) { + seg->address = addr; + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +// __TEXT segment lays out backwards so padding is at front after load commands. +void Util::layoutSectionsInTextSegment(size_t hlcSize, SegmentInfo *seg, + uint64_t &addr) { + seg->address = addr; + // Walks sections starting at end to calculate padding for start. + int64_t taddr = 0; + for (auto it = seg->sections.rbegin(); it != seg->sections.rend(); ++it) { + SectionInfo *sect = *it; + taddr -= sect->size; + taddr = taddr & (0 - sect->alignment); + } + int64_t padding = taddr - hlcSize; + while (padding < 0) + padding += _ctx.pageSize(); + // Start assigning section address starting at padded offset. + addr += (padding + hlcSize); + for (SectionInfo *sect : seg->sections) { + sect->address = llvm::alignTo(addr, sect->alignment); + addr = sect->address + sect->size; + } + seg->size = llvm::alignTo(addr - seg->address, _ctx.pageSize()); +} + +void Util::assignAddressesToSections(const NormalizedFile &file) { + // NOTE!: Keep this in sync with organizeSections. + size_t hlcSize = headerAndLoadCommandsSize(file); + uint64_t address = 0; + for (SegmentInfo *seg : _segmentInfos) { + if (seg->name.equals("__PAGEZERO")) { + seg->size = _ctx.pageZeroSize(); + address += seg->size; + } + else if (seg->name.equals("__TEXT")) { + // _ctx.baseAddress() == 0 implies it was either unspecified or + // pageZeroSize is also 0. In either case resetting address is safe. + address = _ctx.baseAddress() ? _ctx.baseAddress() : address; + layoutSectionsInTextSegment(hlcSize, seg, address); + } else + layoutSectionsInSegment(seg, address); + + address = llvm::alignTo(address, _ctx.pageSize()); + } + DEBUG_WITH_TYPE("WriterMachO-norm", + llvm::dbgs() << "assignAddressesToSections()\n"; + for (SegmentInfo *sgi : _segmentInfos) { + llvm::dbgs() << " address=" << llvm::format("0x%08llX", sgi->address) + << ", size=" << llvm::format("0x%08llX", sgi->size) + << ", segment-name='" << sgi->name + << "'\n"; + for (SectionInfo *si : sgi->sections) { + llvm::dbgs()<< " addr=" << llvm::format("0x%08llX", si->address) + << ", size=" << llvm::format("0x%08llX", si->size) + << ", section-name='" << si->sectionName + << "\n"; + } + } + ); +} + +void Util::copySegmentInfo(NormalizedFile &file) { + for (SegmentInfo *sgi : _segmentInfos) { + Segment seg; + seg.name = sgi->name; + seg.address = sgi->address; + seg.size = sgi->size; + seg.init_access = sgi->init_access; + seg.max_access = sgi->max_access; + file.segments.push_back(seg); + } +} + +void Util::appendSection(SectionInfo *si, NormalizedFile &file) { + // Add new empty section to end of file.sections. + Section temp; + file.sections.push_back(std::move(temp)); + Section* normSect = &file.sections.back(); + // Copy fields to normalized section. + normSect->segmentName = si->segmentName; + normSect->sectionName = si->sectionName; + normSect->type = si->type; + normSect->attributes = si->attributes; + normSect->address = si->address; + normSect->alignment = si->alignment; + // Record where normalized section is. + si->normalizedSectionIndex = file.sections.size()-1; +} + +void Util::copySectionContent(NormalizedFile &file) { + const bool r = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + + // Utility function for ArchHandler to find address of atom in output file. + auto addrForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + auto sectionAddrForAtom = [&] (const Atom &atom) -> uint64_t { + for (const SectionInfo *sectInfo : _sectionInfos) + for (const AtomInfo &atomInfo : sectInfo->atomsAndOffsets) + if (atomInfo.atom == &atom) + return sectInfo->address; + llvm_unreachable("atom not assigned to section"); + }; + + for (SectionInfo *si : _sectionInfos) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + if (isZeroFillSection(si->type)) { + const uint8_t *empty = nullptr; + normSect->content = llvm::makeArrayRef(empty, si->size); + continue; + } + // Copy content from atoms to content buffer for section. + llvm::MutableArrayRef<uint8_t> sectionContent; + if (si->size) { + uint8_t *sectContent = file.ownedAllocations.Allocate<uint8_t>(si->size); + sectionContent = llvm::MutableArrayRef<uint8_t>(sectContent, si->size); + normSect->content = sectionContent; + } + for (AtomInfo &ai : si->atomsAndOffsets) { + if (!ai.atom->size()) { + assert(ai.atom->begin() == ai.atom->end() && + "Cannot have references without content"); + continue; + } + auto atomContent = sectionContent.slice(ai.offsetInSection, + ai.atom->size()); + _archHandler.generateAtomContent(*ai.atom, r, addrForAtom, + sectionAddrForAtom, _ctx.baseAddress(), + atomContent); + } + } +} + +void Util::copySectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // Write sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + for (SectionInfo *si : sgi->sections) { + appendSection(si, file); + } + } +} + +void Util::updateSectionInfo(NormalizedFile &file) { + file.sections.reserve(_sectionInfos.size()); + // sections grouped by segment. + for (SegmentInfo *sgi : _segmentInfos) { + Segment *normSeg = &file.segments[sgi->normalizedSegmentIndex]; + normSeg->address = sgi->address; + normSeg->size = sgi->size; + for (SectionInfo *si : sgi->sections) { + Section *normSect = &file.sections[si->normalizedSectionIndex]; + normSect->address = si->address; + } + } +} + +void Util::copyEntryPointAddress(NormalizedFile &nFile) { + if (!_entryAtom) { + nFile.entryAddress = 0; + return; + } + + if (_ctx.outputTypeHasEntry()) { + if (_archHandler.isThumbFunction(*_entryAtom)) + nFile.entryAddress = (_atomToAddress[_entryAtom] | 1); + else + nFile.entryAddress = _atomToAddress[_entryAtom]; + } +} + +void Util::buildAtomToAddressMap() { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign atom addresses:\n"); + const bool lookForEntry = _ctx.outputTypeHasEntry(); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + _atomToAddress[info.atom] = sect->address + info.offsetInSection; + if (lookForEntry && (info.atom->contentType() == DefinedAtom::typeCode) && + (info.atom->size() != 0) && + info.atom->name() == _ctx.entrySymbolName()) { + _entryAtom = info.atom; + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[info.atom]) + << llvm::format(" 0x%09lX", info.atom) + << ", file=#" + << info.atom->file().ordinal() + << ", atom=#" + << info.atom->ordinal() + << ", name=" + << info.atom->name() + << ", type=" + << info.atom->contentType() + << "\n"); + } + } + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << "assign header alias atom addresses:\n"); + for (const Atom *atom : _machHeaderAliasAtoms) { + _atomToAddress[atom] = _ctx.baseAddress(); +#ifndef NDEBUG + if (auto *definedAtom = dyn_cast<DefinedAtom>(atom)) { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << llvm::format(" 0x%09lX", atom) + << ", file=#" + << definedAtom->file().ordinal() + << ", atom=#" + << definedAtom->ordinal() + << ", name=" + << definedAtom->name() + << ", type=" + << definedAtom->contentType() + << "\n"); + } else { + DEBUG_WITH_TYPE("WriterMachO-address", llvm::dbgs() + << " address=" + << llvm::format("0x%016X", _atomToAddress[atom]) + << " atom=" << atom + << " name=" << atom->name() << "\n"); + } +#endif + } +} + +uint16_t Util::descBits(const DefinedAtom* atom) { + uint16_t desc = 0; + switch (atom->merge()) { + case lld::DefinedAtom::mergeNo: + case lld::DefinedAtom::mergeAsTentative: + break; + case lld::DefinedAtom::mergeAsWeak: + case lld::DefinedAtom::mergeAsWeakAndAddressUsed: + desc |= N_WEAK_DEF; + break; + case lld::DefinedAtom::mergeSameNameAndSize: + case lld::DefinedAtom::mergeByLargestSection: + case lld::DefinedAtom::mergeByContent: + llvm_unreachable("Unsupported DefinedAtom::merge()"); + break; + } + if (atom->contentType() == lld::DefinedAtom::typeResolver) + desc |= N_SYMBOL_RESOLVER; + if (atom->contentType() == lld::DefinedAtom::typeMachHeader) + desc |= REFERENCED_DYNAMICALLY; + if (_archHandler.isThumbFunction(*atom)) + desc |= N_ARM_THUMB_DEF; + if (atom->deadStrip() == DefinedAtom::deadStripNever) { + if ((atom->contentType() != DefinedAtom::typeInitializerPtr) + && (atom->contentType() != DefinedAtom::typeTerminatorPtr)) + desc |= N_NO_DEAD_STRIP; + } + return desc; +} + +bool Util::AtomSorter::operator()(const AtomAndIndex &left, + const AtomAndIndex &right) { + return (left.atom->name().compare(right.atom->name()) < 0); +} + +llvm::Error Util::getSymbolTableRegion(const DefinedAtom* atom, + bool &inGlobalsRegion, + SymbolScope &scope) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + switch (atom->scope()) { + case Atom::scopeTranslationUnit: + scope = 0; + inGlobalsRegion = false; + return llvm::Error(); + case Atom::scopeLinkageUnit: + if ((_ctx.exportMode() == MachOLinkingContext::ExportMode::whiteList) && + _ctx.exportSymbolNamed(atom->name())) { + return llvm::make_error<GenericError>( + Twine("cannot export hidden symbol ") + atom->name()); + } + if (rMode) { + if (_ctx.keepPrivateExterns()) { + // -keep_private_externs means keep in globals region as N_PEXT. + scope = N_PEXT | N_EXT; + inGlobalsRegion = true; + return llvm::Error(); + } + } + // scopeLinkageUnit symbols are no longer global once linked. + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error(); + case Atom::scopeGlobal: + if (_ctx.exportRestrictMode()) { + if (_ctx.exportSymbolNamed(atom->name())) { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error(); + } else { + scope = N_PEXT; + inGlobalsRegion = false; + return llvm::Error(); + } + } else { + scope = N_EXT; + inGlobalsRegion = true; + return llvm::Error(); + } + break; + } + llvm_unreachable("atom->scope() unknown enum value"); +} + +llvm::Error Util::addSymbols(const lld::File &atomFile, + NormalizedFile &file) { + bool rMode = (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT); + // Mach-O symbol table has three regions: locals, globals, undefs. + + // Add all local (non-global) symbols in address order + std::vector<AtomAndIndex> globals; + globals.reserve(512); + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (!atom->name().empty()) { + SymbolScope symbolScope; + bool inGlobalsRegion; + if (auto ec = getSymbolTableRegion(atom, inGlobalsRegion, symbolScope)){ + return ec; + } + if (inGlobalsRegion) { + AtomAndIndex ai = { atom, sect->finalSectionIndex, symbolScope }; + globals.push_back(ai); + } else { + Symbol sym; + sym.name = atom->name(); + sym.type = N_SECT; + sym.scope = symbolScope; + sym.sect = sect->finalSectionIndex; + sym.desc = descBits(atom); + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } else if (rMode && _archHandler.needsLocalSymbolInRelocatableFile(atom)){ + // Create 'Lxxx' labels for anonymous atoms if archHandler says so. + static unsigned tempNum = 1; + char tmpName[16]; + sprintf(tmpName, "L%04u", tempNum++); + StringRef tempRef(tmpName); + Symbol sym; + sym.name = tempRef.copy(file.ownedAllocations); + sym.type = N_SECT; + sym.scope = 0; + sym.sect = sect->finalSectionIndex; + sym.desc = 0; + sym.value = _atomToAddress[atom]; + _atomToSymbolIndex[atom] = file.localSymbols.size(); + file.localSymbols.push_back(sym); + } + } + } + + // Sort global symbol alphabetically, then add to symbol table. + std::sort(globals.begin(), globals.end(), AtomSorter()); + const uint32_t globalStartIndex = file.localSymbols.size(); + for (AtomAndIndex &ai : globals) { + Symbol sym; + sym.name = ai.atom->name(); + sym.type = N_SECT; + sym.scope = ai.scope; + sym.sect = ai.index; + sym.desc = descBits(static_cast<const DefinedAtom*>(ai.atom)); + sym.value = _atomToAddress[ai.atom]; + _atomToSymbolIndex[ai.atom] = globalStartIndex + file.globalSymbols.size(); + file.globalSymbols.push_back(sym); + } + + // Sort undefined symbol alphabetically, then add to symbol table. + std::vector<AtomAndIndex> undefs; + undefs.reserve(128); + for (const UndefinedAtom *atom : atomFile.undefined()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + for (const SharedLibraryAtom *atom : atomFile.sharedLibrary()) { + AtomAndIndex ai = { atom, 0, N_EXT }; + undefs.push_back(ai); + } + std::sort(undefs.begin(), undefs.end(), AtomSorter()); + const uint32_t start = file.globalSymbols.size() + file.localSymbols.size(); + for (AtomAndIndex &ai : undefs) { + Symbol sym; + uint16_t desc = 0; + if (!rMode) { + uint8_t ordinal = 0; + if (!_ctx.useFlatNamespace()) + ordinal = dylibOrdinal(dyn_cast<SharedLibraryAtom>(ai.atom)); + llvm::MachO::SET_LIBRARY_ORDINAL(desc, ordinal); + } + sym.name = ai.atom->name(); + sym.type = N_UNDF; + sym.scope = ai.scope; + sym.sect = 0; + sym.desc = desc; + sym.value = 0; + _atomToSymbolIndex[ai.atom] = file.undefinedSymbols.size() + start; + file.undefinedSymbols.push_back(sym); + } + + return llvm::Error(); +} + +const Atom *Util::targetOfLazyPointer(const DefinedAtom *lpAtom) { + for (const Reference *ref : *lpAtom) { + if (_archHandler.isLazyPointer(*ref)) { + return ref->target(); + } + } + return nullptr; +} + +const Atom *Util::targetOfStub(const DefinedAtom *stubAtom) { + for (const Reference *ref : *stubAtom) { + if (const Atom *ta = ref->target()) { + if (const DefinedAtom *lpAtom = dyn_cast<DefinedAtom>(ta)) { + const Atom *target = targetOfLazyPointer(lpAtom); + if (target) + return target; + } + } + } + return nullptr; +} + +void Util::addIndirectSymbols(const lld::File &atomFile, NormalizedFile &file) { + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + switch (si->type) { + case llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + bool foundTarget = false; + for (const Reference *ref : *info.atom) { + const Atom *target = ref->target(); + if (target) { + if (isa<const SharedLibraryAtom>(target)) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + foundTarget = true; + } else { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_LOCAL); + } + } + } + if (!foundTarget) { + normSect.indirectSymbols.push_back( + llvm::MachO::INDIRECT_SYMBOL_ABS); + } + } + break; + case llvm::MachO::S_LAZY_SYMBOL_POINTERS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfLazyPointer(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + case llvm::MachO::S_SYMBOL_STUBS: + for (const AtomInfo &info : si->atomsAndOffsets) { + const Atom *target = targetOfStub(info.atom); + if (target) { + uint32_t index = _atomToSymbolIndex[target]; + normSect.indirectSymbols.push_back(index); + } + } + break; + default: + break; + } + } +} + +void Util::addDependentDylibs(const lld::File &atomFile,NormalizedFile &nFile) { + // Scan all imported symbols and build up list of dylibs they are from. + int ordinal = 1; + for (const SharedLibraryAtom *slAtom : atomFile.sharedLibrary()) { + StringRef loadPath = slAtom->loadName(); + DylibPathToInfo::iterator pos = _dylibInfo.find(loadPath); + if (pos == _dylibInfo.end()) { + DylibInfo info; + bool flatNamespaceAtom = &slAtom->file() == _ctx.flatNamespaceFile(); + + // If we're in -flat_namespace mode (or this atom came from the flat + // namespace file under -undefined dynamic_lookup) then use the flat + // lookup ordinal. + if (flatNamespaceAtom || _ctx.useFlatNamespace()) + info.ordinal = BIND_SPECIAL_DYLIB_FLAT_LOOKUP; + else + info.ordinal = ordinal++; + info.hasWeak = slAtom->canBeNullAtRuntime(); + info.hasNonWeak = !info.hasWeak; + _dylibInfo[loadPath] = info; + + // Unless this was a flat_namespace atom, record the source dylib. + if (!flatNamespaceAtom) { + DependentDylib depInfo; + depInfo.path = loadPath; + depInfo.kind = llvm::MachO::LC_LOAD_DYLIB; + depInfo.currentVersion = _ctx.dylibCurrentVersion(loadPath); + depInfo.compatVersion = _ctx.dylibCompatVersion(loadPath); + nFile.dependentDylibs.push_back(depInfo); + } + } else { + if ( slAtom->canBeNullAtRuntime() ) + pos->second.hasWeak = true; + else + pos->second.hasNonWeak = true; + } + } + // Automatically weak link dylib in which all symbols are weak (canBeNull). + for (DependentDylib &dep : nFile.dependentDylibs) { + DylibInfo &info = _dylibInfo[dep.path]; + if (info.hasWeak && !info.hasNonWeak) + dep.kind = llvm::MachO::LC_LOAD_WEAK_DYLIB; + else if (_ctx.isUpwardDylib(dep.path)) + dep.kind = llvm::MachO::LC_LOAD_UPWARD_DYLIB; + } +} + +int Util::dylibOrdinal(const SharedLibraryAtom *sa) { + return _dylibInfo[sa->loadName()].ordinal; +} + +void Util::segIndexForSection(const SectionInfo *sect, uint8_t &segmentIndex, + uint64_t &segmentStartAddr) { + segmentIndex = 0; + for (const SegmentInfo *seg : _segmentInfos) { + if ((seg->address <= sect->address) + && (seg->address+seg->size >= sect->address+sect->size)) { + segmentStartAddr = seg->address; + return; + } + ++segmentIndex; + } + llvm_unreachable("section not in any segment"); +} + +uint32_t Util::sectionIndexForAtom(const Atom *atom) { + uint64_t address = _atomToAddress[atom]; + for (const SectionInfo *si : _sectionInfos) { + if ((si->address <= address) && (address < si->address+si->size)) + return si->finalSectionIndex; + } + llvm_unreachable("atom not in any section"); +} + +void Util::addSectionRelocs(const lld::File &, NormalizedFile &file) { + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + return; + + // Utility function for ArchHandler to find symbol index for an atom. + auto symIndexForAtom = [&] (const Atom &atom) -> uint32_t { + auto pos = _atomToSymbolIndex.find(&atom); + assert(pos != _atomToSymbolIndex.end()); + return pos->second; + }; + + // Utility function for ArchHandler to find section index for an atom. + auto sectIndexForAtom = [&] (const Atom &atom) -> uint32_t { + return sectionIndexForAtom(&atom); + }; + + // Utility function for ArchHandler to find address of atom in output file. + auto addressForAtom = [&] (const Atom &atom) -> uint64_t { + auto pos = _atomToAddress.find(&atom); + assert(pos != _atomToAddress.end()); + return pos->second; + }; + + for (SectionInfo *si : _sectionInfos) { + Section &normSect = file.sections[si->normalizedSectionIndex]; + for (const AtomInfo &info : si->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + // Skip emitting relocs for sections which are always able to be + // implicitly regenerated and where the relocation targets an address + // which is defined. + if (si->relocsToDefinedCanBeImplicit && isa<DefinedAtom>(ref->target())) + continue; + _archHandler.appendSectionRelocations(*atom, info.offsetInSection, *ref, + symIndexForAtom, + sectIndexForAtom, + addressForAtom, + normSect.relocations); + } + } + } +} + +void Util::addFunctionStarts(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateFunctionStartsLoadCommand()) + return; + file.functionStarts.reserve(8192); + // Delta compress function starts, starting with the mach header symbol. + const uint64_t badAddress = ~0ULL; + uint64_t addr = badAddress; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + auto type = info.atom->contentType(); + if (type == DefinedAtom::typeMachHeader) { + addr = _atomToAddress[info.atom]; + continue; + } + if (type != DefinedAtom::typeCode) + continue; + assert(addr != badAddress && "Missing mach header symbol"); + // Skip atoms which have 0 size. This is so that LC_FUNCTION_STARTS + // can't spill in to the next section. + if (!info.atom->size()) + continue; + uint64_t nextAddr = _atomToAddress[info.atom]; + if (_archHandler.isThumbFunction(*info.atom)) + nextAddr |= 1; + uint64_t delta = nextAddr - addr; + if (delta) { + ByteBuffer buffer; + buffer.append_uleb128(delta); + file.functionStarts.insert(file.functionStarts.end(), buffer.bytes(), + buffer.bytes() + buffer.size()); + } + addr = nextAddr; + } + } + + // Null terminate, and pad to pointer size for this arch. + file.functionStarts.push_back(0); + + auto size = file.functionStarts.size(); + for (unsigned i = size, e = llvm::alignTo(size, _ctx.is64Bit() ? 8 : 4); + i != e; ++i) + file.functionStarts.push_back(0); +} + +void Util::buildDataInCodeArray(const lld::File &, NormalizedFile &file) { + if (!_ctx.generateDataInCodeLoadCommand()) + return; + for (SectionInfo *si : _sectionInfos) { + for (const AtomInfo &info : si->atomsAndOffsets) { + // Atoms that contain data-in-code have "transition" references + // which mark a point where the embedded data starts of ends. + // This needs to be converted to the mach-o format which is an array + // of data-in-code ranges. + uint32_t startOffset = 0; + DataRegionType mode = DataRegionType(0); + for (const Reference *ref : *info.atom) { + if (ref->kindNamespace() != Reference::KindNamespace::mach_o) + continue; + if (_archHandler.isDataInCodeTransition(ref->kindValue())) { + DataRegionType nextMode = (DataRegionType)ref->addend(); + if (mode != nextMode) { + if (mode != 0) { + // Found end data range, so make range entry. + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = ref->offsetInAtom() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + mode = nextMode; + startOffset = ref->offsetInAtom(); + } + } + if (mode != 0) { + // Function ends with data (no end transition). + DataInCode entry; + entry.offset = si->address + info.offsetInSection + startOffset; + entry.length = info.atom->size() - startOffset; + entry.kind = mode; + file.dataInCode.push_back(entry); + } + } + } +} + +void Util::addRebaseAndBindingInfo(const lld::File &atomFile, + NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + uint8_t segmentIndex; + uint64_t segmentStartAddr; + for (SectionInfo *sect : _sectionInfos) { + segIndexForSection(sect, segmentIndex, segmentStartAddr); + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + for (const Reference *ref : *atom) { + uint64_t segmentOffset = _atomToAddress[atom] + ref->offsetInAtom() + - segmentStartAddr; + const Atom* targ = ref->target(); + if (_archHandler.isPointer(*ref)) { + // A pointer to a DefinedAtom requires rebasing. + if (isa<DefinedAtom>(targ)) { + RebaseLocation rebase; + rebase.segIndex = segmentIndex; + rebase.segOffset = segmentOffset; + rebase.kind = llvm::MachO::REBASE_TYPE_POINTER; + nFile.rebasingInfo.push_back(rebase); + } + // A pointer to an SharedLibraryAtom requires binding. + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + BindLocation bind; + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = sa->canBeNullAtRuntime(); + bind.ordinal = dylibOrdinal(sa); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.bindingInfo.push_back(bind); + } + } + else if (_archHandler.isLazyPointer(*ref)) { + BindLocation bind; + if (const SharedLibraryAtom *sa = dyn_cast<SharedLibraryAtom>(targ)) { + bind.ordinal = dylibOrdinal(sa); + } else { + bind.ordinal = llvm::MachO::BIND_SPECIAL_DYLIB_SELF; + } + bind.segIndex = segmentIndex; + bind.segOffset = segmentOffset; + bind.kind = llvm::MachO::BIND_TYPE_POINTER; + bind.canBeNull = false; //sa->canBeNullAtRuntime(); + bind.symbolName = targ->name(); + bind.addend = ref->addend(); + nFile.lazyBindingInfo.push_back(bind); + } + } + } + } +} + +void Util::addExportInfo(const lld::File &atomFile, NormalizedFile &nFile) { + if (_ctx.outputMachOType() == llvm::MachO::MH_OBJECT) + return; + + for (SectionInfo *sect : _sectionInfos) { + for (const AtomInfo &info : sect->atomsAndOffsets) { + const DefinedAtom *atom = info.atom; + if (atom->scope() != Atom::scopeGlobal) + continue; + if (_ctx.exportRestrictMode()) { + if (!_ctx.exportSymbolNamed(atom->name())) + continue; + } + Export exprt; + exprt.name = atom->name(); + exprt.offset = _atomToAddress[atom] - _ctx.baseAddress(); + exprt.kind = EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (atom->merge() == DefinedAtom::mergeAsWeak) + exprt.flags = EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + else + exprt.flags = 0; + exprt.otherOffset = 0; + exprt.otherName = StringRef(); + nFile.exportInfo.push_back(exprt); + } + } +} + +uint32_t Util::fileFlags() { + // FIXME: these need to determined at runtime. + if (_ctx.outputMachOType() == MH_OBJECT) { + return _subsectionsViaSymbols ? MH_SUBSECTIONS_VIA_SYMBOLS : 0; + } else { + uint32_t flags = MH_DYLDLINK; + if (!_ctx.useFlatNamespace()) + flags |= MH_TWOLEVEL | MH_NOUNDEFS; + if ((_ctx.outputMachOType() == MH_EXECUTE) && _ctx.PIE()) + flags |= MH_PIE; + if (_hasTLVDescriptors) + flags |= (MH_PIE | MH_HAS_TLV_DESCRIPTORS); + return flags; + } +} + +} // end anonymous namespace + +namespace lld { +namespace mach_o { +namespace normalized { + +/// Convert a set of Atoms into a normalized mach-o file. +llvm::Expected<std::unique_ptr<NormalizedFile>> +normalizedFromAtoms(const lld::File &atomFile, + const MachOLinkingContext &context) { + // The util object buffers info until the normalized file can be made. + Util util(context); + util.processDefinedAtoms(atomFile); + util.organizeSections(); + + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + NormalizedFile &normFile = *f.get(); + normFile.arch = context.arch(); + normFile.fileType = context.outputMachOType(); + normFile.flags = util.fileFlags(); + normFile.stackSize = context.stackSize(); + normFile.installName = context.installName(); + normFile.currentVersion = context.currentVersion(); + normFile.compatVersion = context.compatibilityVersion(); + normFile.os = context.os(); + + // If we are emitting an object file, then the min version is the maximum + // of the min's of all the source files and the cmdline. + if (normFile.fileType == llvm::MachO::MH_OBJECT) + normFile.minOSverson = std::max(context.osMinVersion(), util.minVersion()); + else + normFile.minOSverson = context.osMinVersion(); + + normFile.minOSVersionKind = util.minVersionCommandType(); + + normFile.sdkVersion = context.sdkVersion(); + normFile.sourceVersion = context.sourceVersion(); + + if (context.generateVersionLoadCommand() && + context.os() != MachOLinkingContext::OS::unknown) + normFile.hasMinVersionLoadCommand = true; + else if (normFile.fileType == llvm::MachO::MH_OBJECT && + util.allSourceFilesHaveMinVersions() && + ((normFile.os != MachOLinkingContext::OS::unknown) || + util.minVersionCommandType())) { + // If we emit an object file, then it should contain a min version load + // command if all of the source files also contained min version commands. + // Also, we either need to have a platform, or found a platform from the + // source object files. + normFile.hasMinVersionLoadCommand = true; + } + normFile.generateDataInCodeLoadCommand = + context.generateDataInCodeLoadCommand(); + normFile.pageSize = context.pageSize(); + normFile.rpaths = context.rpaths(); + util.addDependentDylibs(atomFile, normFile); + util.copySegmentInfo(normFile); + util.copySectionInfo(normFile); + util.assignAddressesToSections(normFile); + util.buildAtomToAddressMap(); + util.updateSectionInfo(normFile); + util.copySectionContent(normFile); + if (auto ec = util.addSymbols(atomFile, normFile)) { + return std::move(ec); + } + util.addIndirectSymbols(atomFile, normFile); + util.addRebaseAndBindingInfo(atomFile, normFile); + util.addExportInfo(atomFile, normFile); + util.addSectionRelocs(atomFile, normFile); + util.addFunctionStarts(atomFile, normFile); + util.buildDataInCodeArray(atomFile, normFile); + util.copyEntryPointAddress(normFile); + + return std::move(f); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp new file mode 100644 index 00000000000..fc760a3eddd --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp @@ -0,0 +1,1337 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileToAtoms.cpp --------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file Converts from in-memory normalized mach-o to in-memory Atoms. +/// +/// +------------+ +/// | normalized | +/// +------------+ +/// | +/// | +/// v +/// +-------+ +/// | Atoms | +/// +-------+ + +#include "MachONormalizedFile.h" +#include "ArchHandler.h" +#include "Atoms.h" +#include "File.h" +#include "MachONormalizedFileBinaryUtils.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; + +#define DEBUG_TYPE "normalized-file-to-atoms" + +namespace lld { +namespace mach_o { + + +namespace { // anonymous + + +#define ENTRY(seg, sect, type, atomType) \ + {seg, sect, type, DefinedAtom::atomType } + +struct MachORelocatableSectionToAtomType { + StringRef segmentName; + StringRef sectionName; + SectionType sectionType; + DefinedAtom::ContentType atomType; +}; + +const MachORelocatableSectionToAtomType sectsToAtomType[] = { + ENTRY("__TEXT", "__text", S_REGULAR, typeCode), + ENTRY("__TEXT", "__text", S_REGULAR, typeResolver), + ENTRY("__TEXT", "__cstring", S_CSTRING_LITERALS, typeCString), + ENTRY("", "", S_CSTRING_LITERALS, typeCString), + ENTRY("__TEXT", "__ustring", S_REGULAR, typeUTF16String), + ENTRY("__TEXT", "__const", S_REGULAR, typeConstant), + ENTRY("__TEXT", "__const_coal", S_COALESCED, typeConstant), + ENTRY("__TEXT", "__eh_frame", S_COALESCED, typeCFI), + ENTRY("__TEXT", "__eh_frame", S_REGULAR, typeCFI), + ENTRY("__TEXT", "__literal4", S_4BYTE_LITERALS, typeLiteral4), + ENTRY("__TEXT", "__literal8", S_8BYTE_LITERALS, typeLiteral8), + ENTRY("__TEXT", "__literal16", S_16BYTE_LITERALS, typeLiteral16), + ENTRY("__TEXT", "__gcc_except_tab", S_REGULAR, typeLSDA), + ENTRY("__DATA", "__data", S_REGULAR, typeData), + ENTRY("__DATA", "__datacoal_nt", S_COALESCED, typeData), + ENTRY("__DATA", "__const", S_REGULAR, typeConstData), + ENTRY("__DATA", "__cfstring", S_REGULAR, typeCFString), + ENTRY("__DATA", "__mod_init_func", S_MOD_INIT_FUNC_POINTERS, + typeInitializerPtr), + ENTRY("__DATA", "__mod_term_func", S_MOD_TERM_FUNC_POINTERS, + typeTerminatorPtr), + ENTRY("__DATA", "__got", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__bss", S_ZEROFILL, typeZeroFill), + ENTRY("", "", S_NON_LAZY_SYMBOL_POINTERS, + typeGOT), + ENTRY("__DATA", "__interposing", S_INTERPOSING, typeInterposingTuples), + ENTRY("__DATA", "__thread_vars", S_THREAD_LOCAL_VARIABLES, + typeThunkTLV), + ENTRY("__DATA", "__thread_data", S_THREAD_LOCAL_REGULAR, typeTLVInitialData), + ENTRY("__DATA", "__thread_bss", S_THREAD_LOCAL_ZEROFILL, + typeTLVInitialZeroFill), + ENTRY("__DATA", "__objc_imageinfo", S_REGULAR, typeObjCImageInfo), + ENTRY("__DATA", "__objc_catlist", S_REGULAR, typeObjC2CategoryList), + ENTRY("", "", S_INTERPOSING, typeInterposingTuples), + ENTRY("__LD", "__compact_unwind", S_REGULAR, + typeCompactUnwindInfo), + ENTRY("", "", S_REGULAR, typeUnknown) +}; +#undef ENTRY + + +/// Figures out ContentType of a mach-o section. +DefinedAtom::ContentType atomTypeFromSection(const Section §ion, + bool &customSectionName) { + // First look for match of name and type. Empty names in table are wildcards. + customSectionName = false; + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->sectionType != section.type) + continue; + if (!p->segmentName.equals(section.segmentName) && !p->segmentName.empty()) + continue; + if (!p->sectionName.equals(section.sectionName) && !p->sectionName.empty()) + continue; + customSectionName = p->segmentName.empty() && p->sectionName.empty(); + return p->atomType; + } + // Look for code denoted by section attributes + if (section.attributes & S_ATTR_PURE_INSTRUCTIONS) + return DefinedAtom::typeCode; + + return DefinedAtom::typeUnknown; +} + +enum AtomizeModel { + atomizeAtSymbols, + atomizeFixedSize, + atomizePointerSize, + atomizeUTF8, + atomizeUTF16, + atomizeCFI, + atomizeCU, + atomizeCFString +}; + +/// Returns info on how to atomize a section of the specified ContentType. +void sectionParseInfo(DefinedAtom::ContentType atomType, + unsigned int &sizeMultiple, + DefinedAtom::Scope &scope, + DefinedAtom::Merge &merge, + AtomizeModel &atomizeModel) { + struct ParseInfo { + DefinedAtom::ContentType atomType; + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + }; + + #define ENTRY(type, size, scope, merge, model) \ + {DefinedAtom::type, size, DefinedAtom::scope, DefinedAtom::merge, model } + + static const ParseInfo parseInfo[] = { + ENTRY(typeCode, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstData, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeZeroFill, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeConstant, 1, scopeGlobal, mergeNo, + atomizeAtSymbols), + ENTRY(typeCString, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF8), + ENTRY(typeUTF16String, 1, scopeLinkageUnit, mergeByContent, + atomizeUTF16), + ENTRY(typeCFI, 4, scopeTranslationUnit, mergeNo, + atomizeCFI), + ENTRY(typeLiteral4, 4, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral8, 8, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeLiteral16, 16, scopeLinkageUnit, mergeByContent, + atomizeFixedSize), + ENTRY(typeCFString, 4, scopeLinkageUnit, mergeByContent, + atomizeCFString), + ENTRY(typeInitializerPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeTerminatorPtr, 4, scopeTranslationUnit, mergeNo, + atomizePointerSize), + ENTRY(typeCompactUnwindInfo, 4, scopeTranslationUnit, mergeNo, + atomizeCU), + ENTRY(typeGOT, 4, scopeLinkageUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeObjC2CategoryList, 4, scopeTranslationUnit, mergeByContent, + atomizePointerSize), + ENTRY(typeUnknown, 1, scopeGlobal, mergeNo, + atomizeAtSymbols) + }; + #undef ENTRY + const int tableLen = sizeof(parseInfo) / sizeof(ParseInfo); + for (int i=0; i < tableLen; ++i) { + if (parseInfo[i].atomType == atomType) { + sizeMultiple = parseInfo[i].sizeMultiple; + scope = parseInfo[i].scope; + merge = parseInfo[i].merge; + atomizeModel = parseInfo[i].atomizeModel; + return; + } + } + + // Unknown type is atomized by symbols. + sizeMultiple = 1; + scope = DefinedAtom::scopeGlobal; + merge = DefinedAtom::mergeNo; + atomizeModel = atomizeAtSymbols; +} + + +Atom::Scope atomScope(uint8_t scope) { + switch (scope) { + case N_EXT: + return Atom::scopeGlobal; + case N_PEXT: + case N_PEXT | N_EXT: + return Atom::scopeLinkageUnit; + case 0: + return Atom::scopeTranslationUnit; + } + llvm_unreachable("unknown scope value!"); +} + +void appendSymbolsInSection(const std::vector<Symbol> &inSymbols, + uint32_t sectionIndex, + SmallVector<const Symbol *, 64> &outSyms) { + for (const Symbol &sym : inSymbols) { + // Only look at definition symbols. + if ((sym.type & N_TYPE) != N_SECT) + continue; + if (sym.sect != sectionIndex) + continue; + outSyms.push_back(&sym); + } +} + +void atomFromSymbol(DefinedAtom::ContentType atomType, const Section §ion, + MachOFile &file, uint64_t symbolAddr, StringRef symbolName, + uint16_t symbolDescFlags, Atom::Scope symbolScope, + uint64_t nextSymbolAddr, bool scatterable, bool copyRefs) { + // Mach-O symbol table does have size in it. Instead the size is the + // difference between this and the next symbol. + uint64_t size = nextSymbolAddr - symbolAddr; + uint64_t offset = symbolAddr - section.address; + bool noDeadStrip = (symbolDescFlags & N_NO_DEAD_STRIP) || !scatterable; + if (isZeroFillSection(section.type)) { + file.addZeroFillDefinedAtom(symbolName, symbolScope, offset, size, + noDeadStrip, copyRefs, §ion); + } else { + DefinedAtom::Merge merge = (symbolDescFlags & N_WEAK_DEF) + ? DefinedAtom::mergeAsWeak : DefinedAtom::mergeNo; + bool thumb = (symbolDescFlags & N_ARM_THUMB_DEF); + if (atomType == DefinedAtom::typeUnknown) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(symbolName, symbolScope, atomType, + merge, thumb, noDeadStrip, offset, + size, segSectName, true, §ion); + } else { + if ((atomType == lld::DefinedAtom::typeCode) && + (symbolDescFlags & N_SYMBOL_RESOLVER)) { + atomType = lld::DefinedAtom::typeResolver; + } + file.addDefinedAtom(symbolName, symbolScope, atomType, merge, + offset, size, thumb, noDeadStrip, copyRefs, §ion); + } + } +} + +llvm::Error processSymboledSection(DefinedAtom::ContentType atomType, + const Section §ion, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + // Find section's index. + uint32_t sectIndex = 1; + for (auto § : normalizedFile.sections) { + if (§ == §ion) + break; + ++sectIndex; + } + + // Find all symbols in this section. + SmallVector<const Symbol *, 64> symbols; + appendSymbolsInSection(normalizedFile.globalSymbols, sectIndex, symbols); + appendSymbolsInSection(normalizedFile.localSymbols, sectIndex, symbols); + + // Sort symbols. + std::sort(symbols.begin(), symbols.end(), + [](const Symbol *lhs, const Symbol *rhs) -> bool { + if (lhs == rhs) + return false; + // First by address. + uint64_t lhsAddr = lhs->value; + uint64_t rhsAddr = rhs->value; + if (lhsAddr != rhsAddr) + return lhsAddr < rhsAddr; + // If same address, one is an alias so sort by scope. + Atom::Scope lScope = atomScope(lhs->scope); + Atom::Scope rScope = atomScope(rhs->scope); + if (lScope != rScope) + return lScope < rScope; + // If same address and scope, see if one might be better as + // the alias. + bool lPrivate = (lhs->name.front() == 'l'); + bool rPrivate = (rhs->name.front() == 'l'); + if (lPrivate != rPrivate) + return lPrivate; + // If same address and scope, sort by name. + return lhs->name < rhs->name; + }); + + // Debug logging of symbols. + //for (const Symbol *sym : symbols) + // llvm::errs() << " sym: " + // << llvm::format("0x%08llx ", (uint64_t)sym->value) + // << ", " << sym->name << "\n"; + + // If section has no symbols and no content, there are no atoms. + if (symbols.empty() && section.content.empty()) + return llvm::Error(); + + if (symbols.empty()) { + // Section has no symbols, put all content in one anoymous atom. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, + section.address + section.content.size(), + scatterable, copyRefs); + } + else if (symbols.front()->value != section.address) { + // Section has anonymous content before first symbol. + atomFromSymbol(atomType, section, file, section.address, StringRef(), + 0, Atom::scopeTranslationUnit, symbols.front()->value, + scatterable, copyRefs); + } + + const Symbol *lastSym = nullptr; + for (const Symbol *sym : symbols) { + if (lastSym != nullptr) { + // Ignore any assembler added "ltmpNNN" symbol at start of section + // if there is another symbol at the start. + if ((lastSym->value != sym->value) + || lastSym->value != section.address + || !lastSym->name.startswith("ltmp")) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), sym->value, + scatterable, copyRefs); + } + } + lastSym = sym; + } + if (lastSym != nullptr) { + atomFromSymbol(atomType, section, file, lastSym->value, lastSym->name, + lastSym->desc, atomScope(lastSym->scope), + section.address + section.content.size(), + scatterable, copyRefs); + } + + // If object built without .subsections_via_symbols, add reference chain. + if (!scatterable) { + MachODefinedAtom *prevAtom = nullptr; + file.eachAtomInSection(section, + [&](MachODefinedAtom *atom, uint64_t offset)->void { + if (prevAtom) + prevAtom->addReference(Reference::KindNamespace::all, + Reference::KindArch::all, + Reference::kindLayoutAfter, 0, atom, 0); + prevAtom = atom; + }); + } + + return llvm::Error(); +} + +llvm::Error processSection(DefinedAtom::ContentType atomType, + const Section §ion, + bool customSectionName, + const NormalizedFile &normalizedFile, + MachOFile &file, bool scatterable, + bool copyRefs) { + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + + // Get info on how to atomize section. + unsigned int sizeMultiple; + DefinedAtom::Scope scope; + DefinedAtom::Merge merge; + AtomizeModel atomizeModel; + sectionParseInfo(atomType, sizeMultiple, scope, merge, atomizeModel); + + // Validate section size. + if ((section.content.size() % sizeMultiple) != 0) + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " has size (" + + Twine(section.content.size()) + + ") which is not a multiple of " + + Twine(sizeMultiple)); + + if (atomizeModel == atomizeAtSymbols) { + // Break section up into atoms each with a fixed size. + return processSymboledSection(atomType, section, normalizedFile, file, + scatterable, copyRefs); + } else { + unsigned int size; + for (unsigned int offset = 0, e = section.content.size(); offset != e;) { + switch (atomizeModel) { + case atomizeFixedSize: + // Break section up into atoms each with a fixed size. + size = sizeMultiple; + break; + case atomizePointerSize: + // Break section up into atoms each the size of a pointer. + size = is64 ? 8 : 4; + break; + case atomizeUTF8: + // Break section up into zero terminated c-strings. + size = 0; + for (unsigned int i = offset; i < e; ++i) { + if (section.content[i] == 0) { + size = i + 1 - offset; + break; + } + } + break; + case atomizeUTF16: + // Break section up into zero terminated UTF16 strings. + size = 0; + for (unsigned int i = offset; i < e; i += 2) { + if ((section.content[i] == 0) && (section.content[i + 1] == 0)) { + size = i + 2 - offset; + break; + } + } + break; + case atomizeCFI: + // Break section up into dwarf unwind CFIs (FDE or CIE). + size = read32(§ion.content[offset], isBig) + 4; + if (offset+size > section.content.size()) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. Size of CFI " + "starting at offset (" + + Twine(offset) + + ") is past end of section."); + } + break; + case atomizeCU: + // Break section up into compact unwind entries. + size = is64 ? 32 : 20; + break; + case atomizeCFString: + // Break section up into NS/CFString objects. + size = is64 ? 32 : 16; + break; + case atomizeAtSymbols: + break; + } + if (size == 0) { + return llvm::make_error<GenericError>(Twine("Section ") + + section.segmentName + + "/" + section.sectionName + + " is malformed. The last atom " + "is not zero terminated."); + } + if (customSectionName) { + // Mach-O needs a segment and section name. Concatentate those two + // with a / separator (e.g. "seg/sect") to fit into the lld model + // of just a section name. + std::string segSectName = section.segmentName.str() + + "/" + section.sectionName.str(); + file.addDefinedAtomInCustomSection(StringRef(), scope, atomType, + merge, false, false, offset, + size, segSectName, true, §ion); + } else { + file.addDefinedAtom(StringRef(), scope, atomType, merge, offset, size, + false, false, copyRefs, §ion); + } + offset += size; + } + } + return llvm::Error(); +} + +const Section* findSectionCoveringAddress(const NormalizedFile &normalizedFile, + uint64_t address) { + for (const Section &s : normalizedFile.sections) { + uint64_t sAddr = s.address; + if ((sAddr <= address) && (address < sAddr+s.content.size())) { + return &s; + } + } + return nullptr; +} + +const MachODefinedAtom * +findAtomCoveringAddress(const NormalizedFile &normalizedFile, MachOFile &file, + uint64_t addr, Reference::Addend *addend) { + const Section *sect = nullptr; + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return nullptr; + + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + auto atom = + file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return atom; +} + +// Walks all relocations for a section in a normalized .o file and +// creates corresponding lld::Reference objects. +llvm::Error convertRelocs(const Section §ion, + const NormalizedFile &normalizedFile, + bool scatterable, + MachOFile &file, + ArchHandler &handler) { + // Utility function for ArchHandler to find atom by its address. + auto atomByAddr = [&] (uint32_t sectIndex, uint64_t addr, + const lld::Atom **atom, Reference::Addend *addend) + -> llvm::Error { + if (sectIndex > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("out of range section " + "index (") + Twine(sectIndex) + ")"); + const Section *sect = nullptr; + if (sectIndex == 0) { + sect = findSectionCoveringAddress(normalizedFile, addr); + if (!sect) + return llvm::make_error<GenericError>(Twine("address (" + Twine(addr) + + ") is not in any section")); + } else { + sect = &normalizedFile.sections[sectIndex-1]; + } + uint32_t offsetInTarget; + uint64_t offsetInSect = addr - sect->address; + *atom = file.findAtomCoveringAddress(*sect, offsetInSect, &offsetInTarget); + *addend = offsetInTarget; + return llvm::Error(); + }; + + // Utility function for ArchHandler to find atom by its symbol index. + auto atomBySymbol = [&] (uint32_t symbolIndex, const lld::Atom **result) + -> llvm::Error { + // Find symbol from index. + const Symbol *sym = nullptr; + uint32_t numLocal = normalizedFile.localSymbols.size(); + uint32_t numGlobal = normalizedFile.globalSymbols.size(); + uint32_t numUndef = normalizedFile.undefinedSymbols.size(); + if (symbolIndex < numLocal) { + sym = &normalizedFile.localSymbols[symbolIndex]; + } else if (symbolIndex < numLocal+numGlobal) { + sym = &normalizedFile.globalSymbols[symbolIndex-numLocal]; + } else if (symbolIndex < numLocal+numGlobal+numUndef) { + sym = &normalizedFile.undefinedSymbols[symbolIndex-numLocal-numGlobal]; + } else { + return llvm::make_error<GenericError>(Twine("symbol index (") + + Twine(symbolIndex) + ") out of range"); + } + // Find atom from symbol. + if ((sym->type & N_TYPE) == N_SECT) { + if (sym->sect > normalizedFile.sections.size()) + return llvm::make_error<GenericError>(Twine("symbol section index (") + + Twine(sym->sect) + ") out of range "); + const Section &symSection = normalizedFile.sections[sym->sect-1]; + uint64_t targetOffsetInSect = sym->value - symSection.address; + MachODefinedAtom *target = file.findAtomCoveringAddress(symSection, + targetOffsetInSect); + if (target) { + *result = target; + return llvm::Error(); + } + return llvm::make_error<GenericError>("no atom found for defined symbol"); + } else if ((sym->type & N_TYPE) == N_UNDF) { + const lld::Atom *target = file.findUndefAtom(sym->name); + if (target) { + *result = target; + return llvm::Error(); + } + return llvm::make_error<GenericError>("no undefined atom found for sym"); + } else { + // Search undefs + return llvm::make_error<GenericError>("no atom found for symbol"); + } + }; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + // Use old-school iterator so that paired relocations can be grouped. + for (auto it=section.relocations.begin(), e=section.relocations.end(); + it != e; ++it) { + const Relocation &reloc = *it; + // Find atom this relocation is in. + if (reloc.offset > section.content.size()) + return llvm::make_error<GenericError>( + Twine("r_address (") + Twine(reloc.offset) + + ") is larger than section size (" + + Twine(section.content.size()) + ")"); + uint32_t offsetInAtom; + MachODefinedAtom *inAtom = file.findAtomCoveringAddress(section, + reloc.offset, + &offsetInAtom); + assert(inAtom && "r_address in range, should have found atom"); + uint64_t fixupAddress = section.address + reloc.offset; + + const lld::Atom *target = nullptr; + Reference::Addend addend = 0; + Reference::KindValue kind; + if (handler.isPairedReloc(reloc)) { + // Handle paired relocations together. + const Relocation &reloc2 = *++it; + auto relocErr = handler.getPairReferenceInfo( + reloc, reloc2, inAtom, offsetInAtom, fixupAddress, isBig, scatterable, + atomByAddr, atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r1_address=" + Twine::utohexstr(reloc.offset) + + ", r1_type=" + Twine(reloc.type) + + ", r1_extern=" + Twine(reloc.isExtern) + + ", r1_length=" + Twine((int)reloc.length) + + ", r1_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r1_symbolnum=") + + Twine(reloc.symbol)) + : (Twine(", r1_scattered=1, r1_value=") + + Twine(reloc.value))) + + ")" + + ", (r2_address=" + Twine::utohexstr(reloc2.offset) + + ", r2_type=" + Twine(reloc2.type) + + ", r2_extern=" + Twine(reloc2.isExtern) + + ", r2_length=" + Twine((int)reloc2.length) + + ", r2_pcrel=" + Twine(reloc2.pcRel) + + (!reloc2.scattered ? (Twine(", r2_symbolnum=") + + Twine(reloc2.symbol)) + : (Twine(", r2_scattered=1, r2_value=") + + Twine(reloc2.value))) + + ")" ); + }); + } + } + else { + // Use ArchHandler to convert relocation record into information + // needed to instantiate an lld::Reference object. + auto relocErr = handler.getReferenceInfo( + reloc, inAtom, offsetInAtom, fixupAddress, isBig, atomByAddr, + atomBySymbol, &kind, &target, &addend); + if (relocErr) { + return handleErrors(std::move(relocErr), + [&](std::unique_ptr<GenericError> GE) { + return llvm::make_error<GenericError>( + Twine("bad relocation (") + GE->getMessage() + + ") in section " + + section.segmentName + "/" + section.sectionName + + " (r_address=" + Twine::utohexstr(reloc.offset) + + ", r_type=" + Twine(reloc.type) + + ", r_extern=" + Twine(reloc.isExtern) + + ", r_length=" + Twine((int)reloc.length) + + ", r_pcrel=" + Twine(reloc.pcRel) + + (!reloc.scattered ? (Twine(", r_symbolnum=") + Twine(reloc.symbol)) + : (Twine(", r_scattered=1, r_value=") + + Twine(reloc.value))) + + ")" ); + }); + } + } + // Instantiate an lld::Reference object and add to its atom. + inAtom->addReference(Reference::KindNamespace::mach_o, + handler.kindArch(), + kind, offsetInAtom, target, addend); + } + + return llvm::Error(); +} + +bool isDebugInfoSection(const Section §ion) { + if ((section.attributes & S_ATTR_DEBUG) == 0) + return false; + return section.segmentName.equals("__DWARF"); +} + +static int64_t readSPtr(bool is64, bool isBig, const uint8_t *addr) { + if (is64) + return read64(addr, isBig); + + int32_t res = read32(addr, isBig); + return res; +} + +/// --- Augmentation String Processing --- + +struct CIEInfo { + bool _augmentationDataPresent = false; + bool _mayHaveEH = false; + uint32_t _offsetOfLSDA = ~0U; + uint32_t _offsetOfPersonality = ~0U; + uint32_t _offsetOfFDEPointerEncoding = ~0U; + uint32_t _augmentationDataLength = ~0U; +}; + +typedef llvm::DenseMap<const MachODefinedAtom*, CIEInfo> CIEInfoMap; + +static llvm::Error processAugmentationString(const uint8_t *augStr, + CIEInfo &cieInfo, + unsigned &len) { + + if (augStr[0] == '\0') { + len = 1; + return llvm::Error(); + } + + if (augStr[0] != 'z') + return llvm::make_error<GenericError>("expected 'z' at start of " + "augmentation string"); + + cieInfo._augmentationDataPresent = true; + uint64_t idx = 1; + + uint32_t offsetInAugmentationData = 0; + while (augStr[idx] != '\0') { + if (augStr[idx] == 'L') { + cieInfo._offsetOfLSDA = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'P') { + cieInfo._offsetOfPersonality = offsetInAugmentationData; + // This adds a single byte to the augmentation data for the encoding, + // then a number of bytes for the pointer data. + // FIXME: We are assuming 4 is correct here for the pointer size as we + // always currently use delta32ToGOT. + offsetInAugmentationData += 5; + ++idx; + continue; + } + if (augStr[idx] == 'R') { + cieInfo._offsetOfFDEPointerEncoding = offsetInAugmentationData; + // This adds a single byte to the augmentation data. + ++offsetInAugmentationData; + ++idx; + continue; + } + if (augStr[idx] == 'e') { + if (augStr[idx + 1] != 'h') + return llvm::make_error<GenericError>("expected 'eh' in " + "augmentation string"); + cieInfo._mayHaveEH = true; + idx += 2; + continue; + } + ++idx; + } + + cieInfo._augmentationDataLength = offsetInAugmentationData; + + len = idx + 1; + return llvm::Error(); +} + +static llvm::Error processCIE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + CIEInfoMap &cieInfos) { + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const uint8_t *frameData = atom->rawContent().data(); + + CIEInfo cieInfo; + + uint32_t size = read32(frameData, isBig); + uint64_t cieIDField = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + uint64_t versionField = cieIDField + sizeof(uint32_t); + uint64_t augmentationStringField = versionField + sizeof(uint8_t); + + unsigned augmentationStringLength = 0; + if (auto err = processAugmentationString(frameData + augmentationStringField, + cieInfo, augmentationStringLength)) + return err; + + if (cieInfo._offsetOfPersonality != ~0U) { + // If we have augmentation data for the personality function, then we may + // need to implicitly generate its relocation. + + // Parse the EH Data field which is pointer sized. + uint64_t EHDataField = augmentationStringField + augmentationStringLength; + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + unsigned EHDataFieldSize = (cieInfo._mayHaveEH ? (is64 ? 8 : 4) : 0); + + // Parse Code Align Factor which is a ULEB128. + uint64_t CodeAlignField = EHDataField + EHDataFieldSize; + unsigned lengthFieldSize = 0; + llvm::decodeULEB128(frameData + CodeAlignField, &lengthFieldSize); + + // Parse Data Align Factor which is a SLEB128. + uint64_t DataAlignField = CodeAlignField + lengthFieldSize; + llvm::decodeSLEB128(frameData + DataAlignField, &lengthFieldSize); + + // Parse Return Address Register which is a byte. + uint64_t ReturnAddressField = DataAlignField + lengthFieldSize; + + // Parse the augmentation length which is a ULEB128. + uint64_t AugmentationLengthField = ReturnAddressField + 1; + uint64_t AugmentationLength = + llvm::decodeULEB128(frameData + AugmentationLengthField, + &lengthFieldSize); + + if (AugmentationLength != cieInfo._augmentationDataLength) + return llvm::make_error<GenericError>("CIE augmentation data length " + "mismatch"); + + // Get the start address of the augmentation data. + uint64_t AugmentationDataField = AugmentationLengthField + lengthFieldSize; + + // Parse the personality function from the augmentation data. + uint64_t PersonalityField = + AugmentationDataField + cieInfo._offsetOfPersonality; + + // Parse the personality encoding. + // FIXME: Verify that this is a 32-bit pcrel offset. + uint64_t PersonalityFunctionField = PersonalityField + 1; + + if (atom->begin() != atom->end()) { + // If we have an explicit relocation, then make sure it matches this + // offset as this is where we'd expect it to be applied to. + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + if (CurrentRef->offsetInAtom() != PersonalityFunctionField) + return llvm::make_error<GenericError>("CIE personality reloc at " + "wrong offset"); + + if (++CurrentRef != atom->end()) + return llvm::make_error<GenericError>("CIE contains too many relocs"); + } else { + // Implicitly generate the personality function reloc. It's assumed to + // be a delta32 offset to a GOT entry. + // FIXME: Parse the encoding and check this. + int32_t funcDelta = read32(frameData + PersonalityFunctionField, isBig); + uint64_t funcAddress = ehFrameSection->address + offset + + PersonalityFunctionField; + funcAddress += funcDelta; + + const MachODefinedAtom *func = nullptr; + Reference::Addend addend; + func = findAtomCoveringAddress(normalizedFile, file, funcAddress, + &addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + handler.unwindRefToPersonalityFunctionKind(), + PersonalityFunctionField, func, addend); + } + } else if (atom->begin() != atom->end()) { + // Otherwise, we expect there to be no relocations in this atom as the only + // relocation would have been to the personality function. + return llvm::make_error<GenericError>("unexpected relocation in CIE"); + } + + + cieInfos[atom] = std::move(cieInfo); + + return llvm::Error(); +} + +static llvm::Error processFDE(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler, + const Section *ehFrameSection, + MachODefinedAtom *atom, + uint64_t offset, + const CIEInfoMap &cieInfos) { + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + const bool is64 = MachOLinkingContext::is64Bit(normalizedFile.arch); + + // Compiler wasn't lazy and actually told us what it meant. + // Unfortunately, the compiler may not have generated references for all of + // [cie, func, lsda] and so we still need to parse the FDE and add references + // for any the compiler didn't generate. + if (atom->begin() != atom->end()) + atom->sortReferences(); + + DefinedAtom::reference_iterator CurrentRef = atom->begin(); + + // This helper returns the reference (if one exists) at the offset we are + // currently processing. It automatically increments the ref iterator if we + // do return a ref, and throws an error if we pass over a ref without + // comsuming it. + auto currentRefGetter = [&CurrentRef, + &atom](uint64_t Offset)->const Reference* { + // If there are no more refs found, then we are done. + if (CurrentRef == atom->end()) + return nullptr; + + const Reference *Ref = *CurrentRef; + + // If we haven't reached the offset for this reference, then return that + // we don't yet have a reference to process. + if (Offset < Ref->offsetInAtom()) + return nullptr; + + // If the offset is equal, then we want to process this ref. + if (Offset == Ref->offsetInAtom()) { + ++CurrentRef; + return Ref; + } + + // The current ref is at an offset which is earlier than the current + // offset, then we failed to consume it when we should have. In this case + // throw an error. + llvm::report_fatal_error("Skipped reference when processing FDE"); + }; + + // Helper to either get the reference at this current location, and verify + // that it is of the expected type, or add a reference of that type. + // Returns the reference target. + auto verifyOrAddReference = [&](uint64_t targetAddress, + Reference::KindValue refKind, + uint64_t refAddress, + bool allowsAddend)->const Atom* { + if (auto *ref = currentRefGetter(refAddress)) { + // The compiler already emitted a relocation for the CIE ref. This should + // have been converted to the correct type of reference in + // get[Pair]ReferenceInfo(). + assert(ref->kindValue() == refKind && + "Incorrect EHFrame reference kind"); + return ref->target(); + } + Reference::Addend addend; + auto *target = findAtomCoveringAddress(normalizedFile, file, + targetAddress, &addend); + atom->addReference(Reference::KindNamespace::mach_o, handler.kindArch(), + refKind, refAddress, target, addend); + + if (!allowsAddend) + assert(!addend && "EHFrame reference cannot have addend"); + return target; + }; + + const uint8_t *startFrameData = atom->rawContent().data(); + const uint8_t *frameData = startFrameData; + + uint32_t size = read32(frameData, isBig); + uint64_t cieFieldInFDE = size == 0xffffffffU + ? sizeof(uint32_t) + sizeof(uint64_t) + : sizeof(uint32_t); + + // Linker needs to fixup a reference from the FDE to its parent CIE (a + // 32-bit byte offset backwards in the __eh_frame section). + uint32_t cieDelta = read32(frameData + cieFieldInFDE, isBig); + uint64_t cieAddress = ehFrameSection->address + offset + cieFieldInFDE; + cieAddress -= cieDelta; + + auto *cieRefTarget = verifyOrAddReference(cieAddress, + handler.unwindRefToCIEKind(), + cieFieldInFDE, false); + const MachODefinedAtom *cie = dyn_cast<MachODefinedAtom>(cieRefTarget); + assert(cie && cie->contentType() == DefinedAtom::typeCFI && + "FDE's CIE field does not point at the start of a CIE."); + + const CIEInfo &cieInfo = cieInfos.find(cie)->second; + + // Linker needs to fixup reference from the FDE to the function it's + // describing. FIXME: there are actually different ways to do this, and the + // particular method used is specified in the CIE's augmentation fields + // (hopefully) + uint64_t rangeFieldInFDE = cieFieldInFDE + sizeof(uint32_t); + + int64_t functionFromFDE = readSPtr(is64, isBig, + frameData + rangeFieldInFDE); + uint64_t rangeStart = ehFrameSection->address + offset + rangeFieldInFDE; + rangeStart += functionFromFDE; + + verifyOrAddReference(rangeStart, + handler.unwindRefToFunctionKind(), + rangeFieldInFDE, true); + + // Handle the augmentation data if there is any. + if (cieInfo._augmentationDataPresent) { + // First process the augmentation data length field. + uint64_t augmentationDataLengthFieldInFDE = + rangeFieldInFDE + 2 * (is64 ? sizeof(uint64_t) : sizeof(uint32_t)); + unsigned lengthFieldSize = 0; + uint64_t augmentationDataLength = + llvm::decodeULEB128(frameData + augmentationDataLengthFieldInFDE, + &lengthFieldSize); + + if (cieInfo._offsetOfLSDA != ~0U && augmentationDataLength > 0) { + + // Look at the augmentation data field. + uint64_t augmentationDataFieldInFDE = + augmentationDataLengthFieldInFDE + lengthFieldSize; + + int64_t lsdaFromFDE = readSPtr(is64, isBig, + frameData + augmentationDataFieldInFDE); + uint64_t lsdaStart = + ehFrameSection->address + offset + augmentationDataFieldInFDE + + lsdaFromFDE; + + verifyOrAddReference(lsdaStart, + handler.unwindRefToFunctionKind(), + augmentationDataFieldInFDE, true); + } + } + + return llvm::Error(); +} + +llvm::Error addEHFrameReferences(const NormalizedFile &normalizedFile, + MachOFile &file, + mach_o::ArchHandler &handler) { + + const Section *ehFrameSection = nullptr; + for (auto §ion : normalizedFile.sections) + if (section.segmentName == "__TEXT" && + section.sectionName == "__eh_frame") { + ehFrameSection = §ion; + break; + } + + // No __eh_frame so nothing to do. + if (!ehFrameSection) + return llvm::Error(); + + llvm::Error ehFrameErr; + CIEInfoMap cieInfos; + + file.eachAtomInSection(*ehFrameSection, + [&](MachODefinedAtom *atom, uint64_t offset) -> void { + assert(atom->contentType() == DefinedAtom::typeCFI); + + // Bail out if we've encountered an error. + if (ehFrameErr) + return; + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + if (ArchHandler::isDwarfCIE(isBig, atom)) + ehFrameErr = processCIE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + else + ehFrameErr = processFDE(normalizedFile, file, handler, ehFrameSection, + atom, offset, cieInfos); + }); + + return ehFrameErr; +} + +llvm::Error parseObjCImageInfo(const Section §, + const NormalizedFile &normalizedFile, + MachOFile &file) { + + // struct objc_image_info { + // uint32_t version; // initially 0 + // uint32_t flags; + // }; + + ArrayRef<uint8_t> content = sect.content; + if (content.size() != 8) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should be 8 bytes in size"); + + const bool isBig = MachOLinkingContext::isBigEndian(normalizedFile.arch); + uint32_t version = read32(content.data(), isBig); + if (version) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " should have version=0"); + + uint32_t flags = read32(content.data() + 4, isBig); + if (flags & (MachOLinkingContext::objc_supports_gc | + MachOLinkingContext::objc_gc_only)) + return llvm::make_error<GenericError>(sect.segmentName + "/" + + sect.sectionName + + " in file " + file.path() + + " uses GC. This is not supported"); + + if (flags & MachOLinkingContext::objc_retainReleaseForSimulator) + file.setObjcConstraint(MachOLinkingContext::objc_retainReleaseForSimulator); + else + file.setObjcConstraint(MachOLinkingContext::objc_retainRelease); + + file.setSwiftVersion((flags >> 8) & 0xFF); + + return llvm::Error(); +} + + +/// Converts normalized mach-o file into an lld::File and lld::Atoms. +llvm::Expected<std::unique_ptr<lld::File>> +objectToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + std::unique_ptr<MachOFile> file(new MachOFile(path)); + if (auto ec = normalizedObjectToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +llvm::Expected<std::unique_ptr<lld::File>> +dylibToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + // Instantiate SharedLibraryFile object. + std::unique_ptr<MachODylibFile> file(new MachODylibFile(path)); + if (auto ec = normalizedDylibToAtoms(file.get(), normalizedFile, copyRefs)) + return std::move(ec); + return std::unique_ptr<File>(std::move(file)); +} + +} // anonymous namespace + +namespace normalized { + +static bool isObjCImageInfo(const Section §) { + return (sect.segmentName == "__OBJC" && sect.sectionName == "__image_info") || + (sect.segmentName == "__DATA" && sect.sectionName == "__objc_imageinfo"); +} + +llvm::Error +normalizedObjectToAtoms(MachOFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + DEBUG(llvm::dbgs() << "******** Normalizing file to atoms: " + << file->path() << "\n"); + bool scatterable = ((normalizedFile.flags & MH_SUBSECTIONS_VIA_SYMBOLS) != 0); + + // Create atoms from each section. + for (auto § : normalizedFile.sections) { + DEBUG(llvm::dbgs() << "Creating atoms: "; sect.dump()); + if (isDebugInfoSection(sect)) + continue; + + + // If the file contains an objc_image_info struct, then we should parse the + // ObjC flags and Swift version. + if (isObjCImageInfo(sect)) { + if (auto ec = parseObjCImageInfo(sect, normalizedFile, *file)) + return ec; + // We then skip adding atoms for this section as we use the ObjCPass to + // re-emit this data after it has been aggregated for all files. + continue; + } + + bool customSectionName; + DefinedAtom::ContentType atomType = atomTypeFromSection(sect, + customSectionName); + if (auto ec = processSection(atomType, sect, customSectionName, + normalizedFile, *file, scatterable, copyRefs)) + return ec; + } + // Create atoms from undefined symbols. + for (auto &sym : normalizedFile.undefinedSymbols) { + // Undefinded symbols with n_value != 0 are actually tentative definitions. + if (sym.value == Hex64(0)) { + file->addUndefinedAtom(sym.name, copyRefs); + } else { + file->addTentativeDefAtom(sym.name, atomScope(sym.scope), sym.value, + DefinedAtom::Alignment(1 << (sym.desc >> 8)), + copyRefs); + } + } + + // Convert mach-o relocations to References + std::unique_ptr<mach_o::ArchHandler> handler + = ArchHandler::create(normalizedFile.arch); + for (auto § : normalizedFile.sections) { + if (isDebugInfoSection(sect)) + continue; + if (llvm::Error ec = convertRelocs(sect, normalizedFile, scatterable, + *file, *handler)) + return ec; + } + + // Add additional arch-specific References + file->eachDefinedAtom([&](MachODefinedAtom* atom) -> void { + handler->addAdditionalReferences(*atom); + }); + + // Each __eh_frame section needs references to both __text (the function we're + // providing unwind info for) and itself (FDE -> CIE). These aren't + // represented in the relocations on some architectures, so we have to add + // them back in manually there. + if (auto ec = addEHFrameReferences(normalizedFile, *file, *handler)) + return ec; + + // Process mach-o data-in-code regions array. That information is encoded in + // atoms as References at each transition point. + unsigned nextIndex = 0; + for (const DataInCode &entry : normalizedFile.dataInCode) { + ++nextIndex; + const Section* s = findSectionCoveringAddress(normalizedFile, entry.offset); + if (!s) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE address (" + + Twine(entry.offset) + + ") is not in any section")); + } + uint64_t offsetInSect = entry.offset - s->address; + uint32_t offsetInAtom; + MachODefinedAtom *atom = file->findAtomCoveringAddress(*s, offsetInSect, + &offsetInAtom); + if (offsetInAtom + entry.length > atom->size()) { + return llvm::make_error<GenericError>(Twine("LC_DATA_IN_CODE entry " + "(offset=" + + Twine(entry.offset) + + ", length=" + + Twine(entry.length) + + ") crosses atom boundary.")); + } + // Add reference that marks start of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionStart(*atom), + offsetInAtom, atom, entry.kind); + + // Peek at next entry, if it starts where this one ends, skip ending ref. + if (nextIndex < normalizedFile.dataInCode.size()) { + const DataInCode &nextEntry = normalizedFile.dataInCode[nextIndex]; + if (nextEntry.offset == (entry.offset + entry.length)) + continue; + } + + // If data goes to end of function, skip ending ref. + if ((offsetInAtom + entry.length) == atom->size()) + continue; + + // Add reference that marks end of data-in-code. + atom->addReference(Reference::KindNamespace::mach_o, handler->kindArch(), + handler->dataInCodeTransitionEnd(*atom), + offsetInAtom+entry.length, atom, 0); + } + + // Cache some attributes on the file for use later. + file->setFlags(normalizedFile.flags); + file->setArch(normalizedFile.arch); + file->setOS(normalizedFile.os); + file->setMinVersion(normalizedFile.minOSverson); + file->setMinVersionLoadCommandKind(normalizedFile.minOSVersionKind); + + // Sort references in each atom to their canonical order. + for (const DefinedAtom* defAtom : file->defined()) { + reinterpret_cast<const SimpleDefinedAtom*>(defAtom)->sortReferences(); + } + return llvm::Error(); +} + +llvm::Error +normalizedDylibToAtoms(MachODylibFile *file, + const NormalizedFile &normalizedFile, + bool copyRefs) { + file->setInstallName(normalizedFile.installName); + file->setCompatVersion(normalizedFile.compatVersion); + file->setCurrentVersion(normalizedFile.currentVersion); + + // Tell MachODylibFile object about all symbols it exports. + if (!normalizedFile.exportInfo.empty()) { + // If exports trie exists, use it instead of traditional symbol table. + for (const Export &exp : normalizedFile.exportInfo) { + bool weakDef = (exp.flags & EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + // StringRefs from export iterator are ephemeral, so force copy. + file->addExportedSymbol(exp.name, weakDef, true); + } + } else { + for (auto &sym : normalizedFile.globalSymbols) { + assert((sym.scope & N_EXT) && "only expect external symbols here"); + bool weakDef = (sym.desc & N_WEAK_DEF); + file->addExportedSymbol(sym.name, weakDef, copyRefs); + } + } + // Tell MachODylibFile object about all dylibs it re-exports. + for (const DependentDylib &dep : normalizedFile.dependentDylibs) { + if (dep.kind == llvm::MachO::LC_REEXPORT_DYLIB) + file->addReExportedDylib(dep.path); + } + return llvm::Error(); +} + +void relocatableSectionInfoForContentType(DefinedAtom::ContentType atomType, + StringRef &segmentName, + StringRef §ionName, + SectionType §ionType, + SectionAttr §ionAttrs, + bool &relocsToDefinedCanBeImplicit) { + + for (const MachORelocatableSectionToAtomType *p = sectsToAtomType ; + p->atomType != DefinedAtom::typeUnknown; ++p) { + if (p->atomType != atomType) + continue; + // Wild carded entries are ignored for reverse lookups. + if (p->segmentName.empty() || p->sectionName.empty()) + continue; + segmentName = p->segmentName; + sectionName = p->sectionName; + sectionType = p->sectionType; + sectionAttrs = 0; + relocsToDefinedCanBeImplicit = false; + if (atomType == DefinedAtom::typeCode) + sectionAttrs = S_ATTR_PURE_INSTRUCTIONS; + if (atomType == DefinedAtom::typeCFI) + relocsToDefinedCanBeImplicit = true; + return; + } + llvm_unreachable("content type not yet supported"); +} + +llvm::Expected<std::unique_ptr<lld::File>> +normalizedToAtoms(const NormalizedFile &normalizedFile, StringRef path, + bool copyRefs) { + switch (normalizedFile.fileType) { + case MH_DYLIB: + case MH_DYLIB_STUB: + return dylibToAtoms(normalizedFile, path, copyRefs); + case MH_OBJECT: + return objectToAtoms(normalizedFile, path, copyRefs); + default: + llvm_unreachable("unhandled MachO file type!"); + } +} + +#ifndef NDEBUG +void Section::dump(llvm::raw_ostream &OS) const { + OS << "Section (\"" << segmentName << ", " << sectionName << "\""; + OS << ", addr: " << llvm::format_hex(address, 16, true); + OS << ", size: " << llvm::format_hex(content.size(), 8, true) << ")\n"; +} +#endif + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp new file mode 100644 index 00000000000..66be7717398 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp @@ -0,0 +1,843 @@ +//===- lib/ReaderWriter/MachO/MachONormalizedFileYAML.cpp -----------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +/// +/// \file For mach-o object files, this implementation uses YAML I/O to +/// provide the convert between YAML and the normalized mach-o (NM). +/// +/// +------------+ +------+ +/// | normalized | <-> | yaml | +/// +------------+ +------+ + +#include "MachONormalizedFile.h" +#include "lld/Core/Error.h" +#include "lld/Core/LLVM.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + + +using llvm::StringRef; +using namespace llvm::yaml; +using namespace llvm::MachO; +using namespace lld::mach_o::normalized; +using lld::YamlContext; + +LLVM_YAML_IS_SEQUENCE_VECTOR(Segment) +LLVM_YAML_IS_SEQUENCE_VECTOR(DependentDylib) +LLVM_YAML_IS_SEQUENCE_VECTOR(RebaseLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(BindLocation) +LLVM_YAML_IS_SEQUENCE_VECTOR(Export) +LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef) +LLVM_YAML_IS_SEQUENCE_VECTOR(DataInCode) + + +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// A vector of Sections is a sequence. +template<> +struct SequenceTraits< std::vector<Section> > { + static size_t size(IO &io, std::vector<Section> &seq) { + return seq.size(); + } + static Section& element(IO &io, std::vector<Section> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +template<> +struct SequenceTraits< std::vector<Symbol> > { + static size_t size(IO &io, std::vector<Symbol> &seq) { + return seq.size(); + } + static Symbol& element(IO &io, std::vector<Symbol> &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// A vector of Relocations is a sequence. +template<> +struct SequenceTraits< Relocations > { + static size_t size(IO &io, Relocations &seq) { + return seq.size(); + } + static Relocation& element(IO &io, Relocations &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } +}; + +// The content for a section is represented as a flow sequence of hex bytes. +template<> +struct SequenceTraits< ContentBytes > { + static size_t size(IO &io, ContentBytes &seq) { + return seq.size(); + } + static Hex8& element(IO &io, ContentBytes &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +// The indirect symbols for a section is represented as a flow sequence +// of numbers (symbol table indexes). +template<> +struct SequenceTraits< IndirectSymbols > { + static size_t size(IO &io, IndirectSymbols &seq) { + return seq.size(); + } + static uint32_t& element(IO &io, IndirectSymbols &seq, size_t index) { + if ( index >= seq.size() ) + seq.resize(index+1); + return seq[index]; + } + static const bool flow = true; +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::Arch> { + static void enumeration(IO &io, lld::MachOLinkingContext::Arch &value) { + io.enumCase(value, "unknown",lld::MachOLinkingContext::arch_unknown); + io.enumCase(value, "ppc", lld::MachOLinkingContext::arch_ppc); + io.enumCase(value, "x86", lld::MachOLinkingContext::arch_x86); + io.enumCase(value, "x86_64", lld::MachOLinkingContext::arch_x86_64); + io.enumCase(value, "armv6", lld::MachOLinkingContext::arch_armv6); + io.enumCase(value, "armv7", lld::MachOLinkingContext::arch_armv7); + io.enumCase(value, "armv7s", lld::MachOLinkingContext::arch_armv7s); + io.enumCase(value, "arm64", lld::MachOLinkingContext::arch_arm64); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::MachOLinkingContext::OS> { + static void enumeration(IO &io, lld::MachOLinkingContext::OS &value) { + io.enumCase(value, "unknown", + lld::MachOLinkingContext::OS::unknown); + io.enumCase(value, "Mac OS X", + lld::MachOLinkingContext::OS::macOSX); + io.enumCase(value, "iOS", + lld::MachOLinkingContext::OS::iOS); + io.enumCase(value, "iOS Simulator", + lld::MachOLinkingContext::OS::iOS_simulator); + } +}; + + +template <> +struct ScalarEnumerationTraits<HeaderFileType> { + static void enumeration(IO &io, HeaderFileType &value) { + io.enumCase(value, "MH_OBJECT", llvm::MachO::MH_OBJECT); + io.enumCase(value, "MH_DYLIB", llvm::MachO::MH_DYLIB); + io.enumCase(value, "MH_EXECUTE", llvm::MachO::MH_EXECUTE); + io.enumCase(value, "MH_BUNDLE", llvm::MachO::MH_BUNDLE); + } +}; + + +template <> +struct ScalarBitSetTraits<FileFlags> { + static void bitset(IO &io, FileFlags &value) { + io.bitSetCase(value, "MH_TWOLEVEL", + llvm::MachO::MH_TWOLEVEL); + io.bitSetCase(value, "MH_SUBSECTIONS_VIA_SYMBOLS", + llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS); + } +}; + + +template <> +struct ScalarEnumerationTraits<SectionType> { + static void enumeration(IO &io, SectionType &value) { + io.enumCase(value, "S_REGULAR", + llvm::MachO::S_REGULAR); + io.enumCase(value, "S_ZEROFILL", + llvm::MachO::S_ZEROFILL); + io.enumCase(value, "S_CSTRING_LITERALS", + llvm::MachO::S_CSTRING_LITERALS); + io.enumCase(value, "S_4BYTE_LITERALS", + llvm::MachO::S_4BYTE_LITERALS); + io.enumCase(value, "S_8BYTE_LITERALS", + llvm::MachO::S_8BYTE_LITERALS); + io.enumCase(value, "S_LITERAL_POINTERS", + llvm::MachO::S_LITERAL_POINTERS); + io.enumCase(value, "S_NON_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_NON_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_LAZY_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_SYMBOL_POINTERS); + io.enumCase(value, "S_SYMBOL_STUBS", + llvm::MachO::S_SYMBOL_STUBS); + io.enumCase(value, "S_MOD_INIT_FUNC_POINTERS", + llvm::MachO::S_MOD_INIT_FUNC_POINTERS); + io.enumCase(value, "S_MOD_TERM_FUNC_POINTERS", + llvm::MachO::S_MOD_TERM_FUNC_POINTERS); + io.enumCase(value, "S_COALESCED", + llvm::MachO::S_COALESCED); + io.enumCase(value, "S_GB_ZEROFILL", + llvm::MachO::S_GB_ZEROFILL); + io.enumCase(value, "S_INTERPOSING", + llvm::MachO::S_INTERPOSING); + io.enumCase(value, "S_16BYTE_LITERALS", + llvm::MachO::S_16BYTE_LITERALS); + io.enumCase(value, "S_DTRACE_DOF", + llvm::MachO::S_DTRACE_DOF); + io.enumCase(value, "S_LAZY_DYLIB_SYMBOL_POINTERS", + llvm::MachO::S_LAZY_DYLIB_SYMBOL_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_REGULAR", + llvm::MachO::S_THREAD_LOCAL_REGULAR); + io.enumCase(value, "S_THREAD_LOCAL_ZEROFILL", + llvm::MachO::S_THREAD_LOCAL_ZEROFILL); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLES", + llvm::MachO::S_THREAD_LOCAL_VARIABLES); + io.enumCase(value, "S_THREAD_LOCAL_VARIABLE_POINTERS", + llvm::MachO::S_THREAD_LOCAL_VARIABLE_POINTERS); + io.enumCase(value, "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS", + llvm::MachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } +}; + +template <> +struct ScalarBitSetTraits<SectionAttr> { + static void bitset(IO &io, SectionAttr &value) { + io.bitSetCase(value, "S_ATTR_PURE_INSTRUCTIONS", + llvm::MachO::S_ATTR_PURE_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_SOME_INSTRUCTIONS", + llvm::MachO::S_ATTR_SOME_INSTRUCTIONS); + io.bitSetCase(value, "S_ATTR_NO_DEAD_STRIP", + llvm::MachO::S_ATTR_NO_DEAD_STRIP); + io.bitSetCase(value, "S_ATTR_EXT_RELOC", + llvm::MachO::S_ATTR_EXT_RELOC); + io.bitSetCase(value, "S_ATTR_LOC_RELOC", + llvm::MachO::S_ATTR_LOC_RELOC); + } +}; + +/// This is a custom formatter for SectionAlignment. Values are +/// the power to raise by, ie, the n in 2^n. +template <> struct ScalarTraits<SectionAlignment> { + static void output(const SectionAlignment &value, void *ctxt, + raw_ostream &out) { + out << llvm::format("%d", (uint32_t)value); + } + + static StringRef input(StringRef scalar, void *ctxt, + SectionAlignment &value) { + uint32_t alignment; + if (scalar.getAsInteger(0, alignment)) { + return "malformed alignment value"; + } + if (!llvm::isPowerOf2_32(alignment)) + return "alignment must be a power of 2"; + value = alignment; + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> +struct ScalarEnumerationTraits<NListType> { + static void enumeration(IO &io, NListType &value) { + io.enumCase(value, "N_UNDF", llvm::MachO::N_UNDF); + io.enumCase(value, "N_ABS", llvm::MachO::N_ABS); + io.enumCase(value, "N_SECT", llvm::MachO::N_SECT); + io.enumCase(value, "N_PBUD", llvm::MachO::N_PBUD); + io.enumCase(value, "N_INDR", llvm::MachO::N_INDR); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolScope> { + static void bitset(IO &io, SymbolScope &value) { + io.bitSetCase(value, "N_EXT", llvm::MachO::N_EXT); + io.bitSetCase(value, "N_PEXT", llvm::MachO::N_PEXT); + } +}; + +template <> +struct ScalarBitSetTraits<SymbolDesc> { + static void bitset(IO &io, SymbolDesc &value) { + io.bitSetCase(value, "N_NO_DEAD_STRIP", llvm::MachO::N_NO_DEAD_STRIP); + io.bitSetCase(value, "N_WEAK_REF", llvm::MachO::N_WEAK_REF); + io.bitSetCase(value, "N_WEAK_DEF", llvm::MachO::N_WEAK_DEF); + io.bitSetCase(value, "N_ARM_THUMB_DEF", llvm::MachO::N_ARM_THUMB_DEF); + io.bitSetCase(value, "N_SYMBOL_RESOLVER", llvm::MachO::N_SYMBOL_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Section> { + struct NormalizedContentBytes; + static void mapping(IO &io, Section §) { + io.mapRequired("segment", sect.segmentName); + io.mapRequired("section", sect.sectionName); + io.mapRequired("type", sect.type); + io.mapOptional("attributes", sect.attributes); + io.mapOptional("alignment", sect.alignment, (SectionAlignment)1); + io.mapRequired("address", sect.address); + if (isZeroFillSection(sect.type)) { + // S_ZEROFILL sections use "size:" instead of "content:" + uint64_t size = sect.content.size(); + io.mapOptional("size", size); + if (!io.outputting()) { + uint8_t *bytes = nullptr; + sect.content = makeArrayRef(bytes, size); + } + } else { + MappingNormalization<NormalizedContent, ArrayRef<uint8_t>> content( + io, sect.content); + io.mapOptional("content", content->_normalizedContent); + } + io.mapOptional("relocations", sect.relocations); + io.mapOptional("indirect-syms", sect.indirectSymbols); + } + + struct NormalizedContent { + NormalizedContent(IO &io) : _io(io) {} + NormalizedContent(IO &io, ArrayRef<uint8_t> content) : _io(io) { + // When writing yaml, copy content byte array to Hex8 vector. + for (auto &c : content) { + _normalizedContent.push_back(c); + } + } + ArrayRef<uint8_t> denormalize(IO &io) { + // When reading yaml, allocate byte array owned by NormalizedFile and + // copy Hex8 vector to byte array. + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + size_t size = _normalizedContent.size(); + if (!size) + return None; + uint8_t *bytes = file->ownedAllocations.Allocate<uint8_t>(size); + std::copy(_normalizedContent.begin(), _normalizedContent.end(), bytes); + return makeArrayRef(bytes, size); + } + + IO &_io; + ContentBytes _normalizedContent; + }; +}; + + +template <> +struct MappingTraits<Relocation> { + static void mapping(IO &io, Relocation &reloc) { + io.mapRequired("offset", reloc.offset); + io.mapOptional("scattered", reloc.scattered, false); + io.mapRequired("type", reloc.type); + io.mapRequired("length", reloc.length); + io.mapRequired("pc-rel", reloc.pcRel); + if ( !reloc.scattered ) + io.mapRequired("extern", reloc.isExtern); + if ( reloc.scattered ) + io.mapRequired("value", reloc.value); + if ( !reloc.scattered ) + io.mapRequired("symbol", reloc.symbol); + } +}; + + +template <> +struct ScalarEnumerationTraits<RelocationInfoType> { + static void enumeration(IO &io, RelocationInfoType &value) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *file = info->_normalizeMachOFile; + assert(file != nullptr); + switch (file->arch) { + case lld::MachOLinkingContext::arch_x86_64: + io.enumCase(value, "X86_64_RELOC_UNSIGNED", + llvm::MachO::X86_64_RELOC_UNSIGNED); + io.enumCase(value, "X86_64_RELOC_SIGNED", + llvm::MachO::X86_64_RELOC_SIGNED); + io.enumCase(value, "X86_64_RELOC_BRANCH", + llvm::MachO::X86_64_RELOC_BRANCH); + io.enumCase(value, "X86_64_RELOC_GOT_LOAD", + llvm::MachO::X86_64_RELOC_GOT_LOAD); + io.enumCase(value, "X86_64_RELOC_GOT", + llvm::MachO::X86_64_RELOC_GOT); + io.enumCase(value, "X86_64_RELOC_SUBTRACTOR", + llvm::MachO::X86_64_RELOC_SUBTRACTOR); + io.enumCase(value, "X86_64_RELOC_SIGNED_1", + llvm::MachO::X86_64_RELOC_SIGNED_1); + io.enumCase(value, "X86_64_RELOC_SIGNED_2", + llvm::MachO::X86_64_RELOC_SIGNED_2); + io.enumCase(value, "X86_64_RELOC_SIGNED_4", + llvm::MachO::X86_64_RELOC_SIGNED_4); + io.enumCase(value, "X86_64_RELOC_TLV", + llvm::MachO::X86_64_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_x86: + io.enumCase(value, "GENERIC_RELOC_VANILLA", + llvm::MachO::GENERIC_RELOC_VANILLA); + io.enumCase(value, "GENERIC_RELOC_PAIR", + llvm::MachO::GENERIC_RELOC_PAIR); + io.enumCase(value, "GENERIC_RELOC_SECTDIFF", + llvm::MachO::GENERIC_RELOC_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_LOCAL_SECTDIFF", + llvm::MachO::GENERIC_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "GENERIC_RELOC_TLV", + llvm::MachO::GENERIC_RELOC_TLV); + break; + case lld::MachOLinkingContext::arch_armv6: + case lld::MachOLinkingContext::arch_armv7: + case lld::MachOLinkingContext::arch_armv7s: + io.enumCase(value, "ARM_RELOC_VANILLA", + llvm::MachO::ARM_RELOC_VANILLA); + io.enumCase(value, "ARM_RELOC_PAIR", + llvm::MachO::ARM_RELOC_PAIR); + io.enumCase(value, "ARM_RELOC_SECTDIFF", + llvm::MachO::ARM_RELOC_SECTDIFF); + io.enumCase(value, "ARM_RELOC_LOCAL_SECTDIFF", + llvm::MachO::ARM_RELOC_LOCAL_SECTDIFF); + io.enumCase(value, "ARM_RELOC_BR24", + llvm::MachO::ARM_RELOC_BR24); + io.enumCase(value, "ARM_THUMB_RELOC_BR22", + llvm::MachO::ARM_THUMB_RELOC_BR22); + io.enumCase(value, "ARM_RELOC_HALF", + llvm::MachO::ARM_RELOC_HALF); + io.enumCase(value, "ARM_RELOC_HALF_SECTDIFF", + llvm::MachO::ARM_RELOC_HALF_SECTDIFF); + break; + case lld::MachOLinkingContext::arch_arm64: + io.enumCase(value, "ARM64_RELOC_UNSIGNED", + llvm::MachO::ARM64_RELOC_UNSIGNED); + io.enumCase(value, "ARM64_RELOC_SUBTRACTOR", + llvm::MachO::ARM64_RELOC_SUBTRACTOR); + io.enumCase(value, "ARM64_RELOC_BRANCH26", + llvm::MachO::ARM64_RELOC_BRANCH26); + io.enumCase(value, "ARM64_RELOC_PAGE21", + llvm::MachO::ARM64_RELOC_PAGE21); + io.enumCase(value, "ARM64_RELOC_PAGEOFF12", + llvm::MachO::ARM64_RELOC_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_GOT_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_POINTER_TO_GOT", + llvm::MachO::ARM64_RELOC_POINTER_TO_GOT); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGE21", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGE21); + io.enumCase(value, "ARM64_RELOC_TLVP_LOAD_PAGEOFF12", + llvm::MachO::ARM64_RELOC_TLVP_LOAD_PAGEOFF12); + io.enumCase(value, "ARM64_RELOC_ADDEND", + llvm::MachO::ARM64_RELOC_ADDEND); + break; + default: + llvm_unreachable("unknown architecture"); + } + } +}; + + +template <> +struct MappingTraits<Symbol> { + static void mapping(IO &io, Symbol& sym) { + io.mapRequired("name", sym.name); + io.mapRequired("type", sym.type); + io.mapOptional("scope", sym.scope, SymbolScope(0)); + io.mapOptional("sect", sym.sect, (uint8_t)0); + if (sym.type == llvm::MachO::N_UNDF) { + // In undef symbols, desc field contains alignment/ordinal info + // which is better represented as a hex vaule. + uint16_t t1 = sym.desc; + Hex16 t2 = t1; + io.mapOptional("desc", t2, Hex16(0)); + sym.desc = t2; + } else { + // In defined symbols, desc fit is a set of option bits. + io.mapOptional("desc", sym.desc, SymbolDesc(0)); + } + io.mapRequired("value", sym.value); + } +}; + +// Custom mapping for VMProtect (e.g. "r-x"). +template <> +struct ScalarTraits<VMProtect> { + static void output(const VMProtect &value, void*, raw_ostream &out) { + out << ( (value & llvm::MachO::VM_PROT_READ) ? 'r' : '-'); + out << ( (value & llvm::MachO::VM_PROT_WRITE) ? 'w' : '-'); + out << ( (value & llvm::MachO::VM_PROT_EXECUTE) ? 'x' : '-'); + } + static StringRef input(StringRef scalar, void*, VMProtect &value) { + value = 0; + if (scalar.size() != 3) + return "segment access protection must be three chars (e.g. \"r-x\")"; + switch (scalar[0]) { + case 'r': + value = llvm::MachO::VM_PROT_READ; + break; + case '-': + break; + default: + return "segment access protection first char must be 'r' or '-'"; + } + switch (scalar[1]) { + case 'w': + value = value | llvm::MachO::VM_PROT_WRITE; + break; + case '-': + break; + default: + return "segment access protection second char must be 'w' or '-'"; + } + switch (scalar[2]) { + case 'x': + value = value | llvm::MachO::VM_PROT_EXECUTE; + break; + case '-': + break; + default: + return "segment access protection third char must be 'x' or '-'"; + } + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + + +template <> +struct MappingTraits<Segment> { + static void mapping(IO &io, Segment& seg) { + io.mapRequired("name", seg.name); + io.mapRequired("address", seg.address); + io.mapRequired("size", seg.size); + io.mapRequired("init-access", seg.init_access); + io.mapRequired("max-access", seg.max_access); + } +}; + +template <> +struct ScalarEnumerationTraits<LoadCommandType> { + static void enumeration(IO &io, LoadCommandType &value) { + io.enumCase(value, "LC_LOAD_DYLIB", + llvm::MachO::LC_LOAD_DYLIB); + io.enumCase(value, "LC_LOAD_WEAK_DYLIB", + llvm::MachO::LC_LOAD_WEAK_DYLIB); + io.enumCase(value, "LC_REEXPORT_DYLIB", + llvm::MachO::LC_REEXPORT_DYLIB); + io.enumCase(value, "LC_LOAD_UPWARD_DYLIB", + llvm::MachO::LC_LOAD_UPWARD_DYLIB); + io.enumCase(value, "LC_LAZY_LOAD_DYLIB", + llvm::MachO::LC_LAZY_LOAD_DYLIB); + io.enumCase(value, "LC_VERSION_MIN_MACOSX", + llvm::MachO::LC_VERSION_MIN_MACOSX); + io.enumCase(value, "LC_VERSION_MIN_IPHONEOS", + llvm::MachO::LC_VERSION_MIN_IPHONEOS); + io.enumCase(value, "LC_VERSION_MIN_TVOS", + llvm::MachO::LC_VERSION_MIN_TVOS); + io.enumCase(value, "LC_VERSION_MIN_WATCHOS", + llvm::MachO::LC_VERSION_MIN_WATCHOS); + } +}; + +template <> +struct MappingTraits<DependentDylib> { + static void mapping(IO &io, DependentDylib& dylib) { + io.mapRequired("path", dylib.path); + io.mapOptional("kind", dylib.kind, + llvm::MachO::LC_LOAD_DYLIB); + io.mapOptional("compat-version", dylib.compatVersion, + PackedVersion(0x10000)); + io.mapOptional("current-version", dylib.currentVersion, + PackedVersion(0x10000)); + } +}; + +template <> +struct ScalarEnumerationTraits<RebaseType> { + static void enumeration(IO &io, RebaseType &value) { + io.enumCase(value, "REBASE_TYPE_POINTER", + llvm::MachO::REBASE_TYPE_POINTER); + io.enumCase(value, "REBASE_TYPE_TEXT_PCREL32", + llvm::MachO::REBASE_TYPE_TEXT_PCREL32); + io.enumCase(value, "REBASE_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::REBASE_TYPE_TEXT_ABSOLUTE32); + } +}; + + +template <> +struct MappingTraits<RebaseLocation> { + static void mapping(IO &io, RebaseLocation& rebase) { + io.mapRequired("segment-index", rebase.segIndex); + io.mapRequired("segment-offset", rebase.segOffset); + io.mapOptional("kind", rebase.kind, + llvm::MachO::REBASE_TYPE_POINTER); + } +}; + + + +template <> +struct ScalarEnumerationTraits<BindType> { + static void enumeration(IO &io, BindType &value) { + io.enumCase(value, "BIND_TYPE_POINTER", + llvm::MachO::BIND_TYPE_POINTER); + io.enumCase(value, "BIND_TYPE_TEXT_ABSOLUTE32", + llvm::MachO::BIND_TYPE_TEXT_ABSOLUTE32); + io.enumCase(value, "BIND_TYPE_TEXT_PCREL32", + llvm::MachO::BIND_TYPE_TEXT_PCREL32); + } +}; + +template <> +struct MappingTraits<BindLocation> { + static void mapping(IO &io, BindLocation &bind) { + io.mapRequired("segment-index", bind.segIndex); + io.mapRequired("segment-offset", bind.segOffset); + io.mapOptional("kind", bind.kind, + llvm::MachO::BIND_TYPE_POINTER); + io.mapOptional("can-be-null", bind.canBeNull, false); + io.mapRequired("ordinal", bind.ordinal); + io.mapRequired("symbol-name", bind.symbolName); + io.mapOptional("addend", bind.addend, Hex64(0)); + } +}; + + +template <> +struct ScalarEnumerationTraits<ExportSymbolKind> { + static void enumeration(IO &io, ExportSymbolKind &value) { + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_REGULAR", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL); + io.enumCase(value, "EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE", + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE); + } +}; + +template <> +struct ScalarBitSetTraits<ExportFlags> { + static void bitset(IO &io, ExportFlags &value) { + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION", + llvm::MachO::EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_REEXPORT", + llvm::MachO::EXPORT_SYMBOL_FLAGS_REEXPORT); + io.bitSetCase(value, "EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER", + llvm::MachO::EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER); + } +}; + + +template <> +struct MappingTraits<Export> { + static void mapping(IO &io, Export &exp) { + io.mapRequired("name", exp.name); + io.mapOptional("offset", exp.offset); + io.mapOptional("kind", exp.kind, + llvm::MachO::EXPORT_SYMBOL_FLAGS_KIND_REGULAR); + if (!io.outputting() || exp.flags) + io.mapOptional("flags", exp.flags); + io.mapOptional("other", exp.otherOffset, Hex32(0)); + io.mapOptional("other-name", exp.otherName, StringRef()); + } +}; + +template <> +struct ScalarEnumerationTraits<DataRegionType> { + static void enumeration(IO &io, DataRegionType &value) { + io.enumCase(value, "DICE_KIND_DATA", + llvm::MachO::DICE_KIND_DATA); + io.enumCase(value, "DICE_KIND_JUMP_TABLE8", + llvm::MachO::DICE_KIND_JUMP_TABLE8); + io.enumCase(value, "DICE_KIND_JUMP_TABLE16", + llvm::MachO::DICE_KIND_JUMP_TABLE16); + io.enumCase(value, "DICE_KIND_JUMP_TABLE32", + llvm::MachO::DICE_KIND_JUMP_TABLE32); + io.enumCase(value, "DICE_KIND_ABS_JUMP_TABLE32", + llvm::MachO::DICE_KIND_ABS_JUMP_TABLE32); + } +}; + +template <> +struct MappingTraits<DataInCode> { + static void mapping(IO &io, DataInCode &entry) { + io.mapRequired("offset", entry.offset); + io.mapRequired("length", entry.length); + io.mapRequired("kind", entry.kind); + } +}; + +template <> +struct ScalarTraits<PackedVersion> { + static void output(const PackedVersion &value, void*, raw_ostream &out) { + out << llvm::format("%d.%d", (value >> 16), (value >> 8) & 0xFF); + if (value & 0xFF) { + out << llvm::format(".%d", (value & 0xFF)); + } + } + static StringRef input(StringRef scalar, void*, PackedVersion &result) { + uint32_t value; + if (lld::MachOLinkingContext::parsePackedVersion(scalar, value)) + return "malformed version number"; + result = value; + // Return the empty string on success, + return StringRef(); + } + static bool mustQuote(StringRef) { return false; } +}; + +template <> +struct MappingTraits<NormalizedFile> { + static void mapping(IO &io, NormalizedFile &file) { + io.mapRequired("arch", file.arch); + io.mapRequired("file-type", file.fileType); + io.mapOptional("flags", file.flags); + io.mapOptional("dependents", file.dependentDylibs); + io.mapOptional("install-name", file.installName, StringRef()); + io.mapOptional("compat-version", file.compatVersion, PackedVersion(0x10000)); + io.mapOptional("current-version", file.currentVersion, PackedVersion(0x10000)); + io.mapOptional("has-UUID", file.hasUUID, true); + io.mapOptional("rpaths", file.rpaths); + io.mapOptional("entry-point", file.entryAddress, Hex64(0)); + io.mapOptional("stack-size", file.stackSize, Hex64(0)); + io.mapOptional("source-version", file.sourceVersion, Hex64(0)); + io.mapOptional("OS", file.os); + io.mapOptional("min-os-version", file.minOSverson, PackedVersion(0)); + io.mapOptional("min-os-version-kind", file.minOSVersionKind, (LoadCommandType)0); + io.mapOptional("sdk-version", file.sdkVersion, PackedVersion(0)); + io.mapOptional("segments", file.segments); + io.mapOptional("sections", file.sections); + io.mapOptional("local-symbols", file.localSymbols); + io.mapOptional("global-symbols", file.globalSymbols); + io.mapOptional("undefined-symbols",file.undefinedSymbols); + io.mapOptional("page-size", file.pageSize, Hex32(4096)); + io.mapOptional("rebasings", file.rebasingInfo); + io.mapOptional("bindings", file.bindingInfo); + io.mapOptional("weak-bindings", file.weakBindingInfo); + io.mapOptional("lazy-bindings", file.lazyBindingInfo); + io.mapOptional("exports", file.exportInfo); + io.mapOptional("dataInCode", file.dataInCode); + } + static StringRef validate(IO &io, NormalizedFile &file) { + return StringRef(); + } +}; + +} // namespace llvm +} // namespace yaml + + +namespace lld { +namespace mach_o { + +/// Handles !mach-o tagged yaml documents. +bool MachOYamlIOTaggedDocumentHandler::handledDocTag(llvm::yaml::IO &io, + const lld::File *&file) const { + if (!io.mapTag("!mach-o")) + return false; + // Step 1: parse yaml into normalized mach-o struct. + NormalizedFile nf; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_normalizeMachOFile == nullptr); + info->_normalizeMachOFile = &nf; + MappingTraits<NormalizedFile>::mapping(io, nf); + // Step 2: parse normalized mach-o struct into atoms. + auto fileOrError = normalizedToAtoms(nf, info->_path, true); + + // Check that we parsed successfully. + if (!fileOrError) { + std::string buffer; + llvm::raw_string_ostream stream(buffer); + handleAllErrors(fileOrError.takeError(), + [&](const llvm::ErrorInfoBase &EI) { + EI.log(stream); + stream << "\n"; + }); + io.setError(stream.str()); + return false; + } + + if (nf.arch != _arch) { + io.setError(Twine("file is wrong architecture. Expected (" + + MachOLinkingContext::nameFromArch(_arch) + + ") found (" + + MachOLinkingContext::nameFromArch(nf.arch) + + ")")); + return false; + } + info->_normalizeMachOFile = nullptr; + file = fileOrError->release(); + return true; +} + + + +namespace normalized { + +/// Parses a yaml encoded mach-o file to produce an in-memory normalized view. +llvm::Expected<std::unique_ptr<NormalizedFile>> +readYaml(std::unique_ptr<MemoryBuffer> &mb) { + // Make empty NormalizedFile. + std::unique_ptr<NormalizedFile> f(new NormalizedFile()); + + // Create YAML Input parser. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f.get(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill NormalizedFile by parsing yaml. + yin >> *f; + + // Return error if there were parsing problems. + if (auto ec = yin.error()) + return llvm::make_error<GenericError>(Twine("YAML parsing error: ") + + ec.message()); + + // Hand ownership of instantiated NormalizedFile to caller. + return std::move(f); +} + + +/// Writes a yaml encoded mach-o files from an in-memory normalized view. +std::error_code writeYaml(const NormalizedFile &file, raw_ostream &out) { + // YAML I/O is not const aware, so need to cast away ;-( + NormalizedFile *f = const_cast<NormalizedFile*>(&file); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._normalizeMachOFile = f; + llvm::yaml::Output yout(out, &yamlContext); + + // Stream out yaml. + yout << *f; + + return std::error_code(); +} + +} // namespace normalized +} // namespace mach_o +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h new file mode 100644 index 00000000000..cd01d4aa2c9 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/MachOPasses.h @@ -0,0 +1,30 @@ +//===- lib/ReaderWriter/MachO/MachOPasses.h -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_PASSES_H +#define LLD_READER_WRITER_MACHO_PASSES_H + +#include "lld/Core/PassManager.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +void addLayoutPass(PassManager &pm, const MachOLinkingContext &ctx); +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx); +void addGOTPass(PassManager &pm, const MachOLinkingContext &ctx); +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx); +void addCompactUnwindPass(PassManager &pm, const MachOLinkingContext &ctx); +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx); +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx); + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_PASSES_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp new file mode 100644 index 00000000000..ba24b3fecdf --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ObjCPass.cpp @@ -0,0 +1,128 @@ +//===- lib/ReaderWriter/MachO/ObjCPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +/// +/// ObjC Image Info Atom created by the ObjC pass. +/// +class ObjCImageInfoAtom : public SimpleDefinedAtom { +public: + ObjCImageInfoAtom(const File &file, + MachOLinkingContext::ObjCConstraint objCConstraint, + uint32_t swiftVersion) + : SimpleDefinedAtom(file) { + + Data.info.version = 0; + + switch (objCConstraint) { + case MachOLinkingContext::objc_unknown: + llvm_unreachable("Shouldn't run the objc pass without a constraint"); + case MachOLinkingContext::objc_supports_gc: + case MachOLinkingContext::objc_gc_only: + llvm_unreachable("GC is not supported"); + case MachOLinkingContext::objc_retainReleaseForSimulator: + // The retain/release for simulator flag is already the correct + // encoded value for the data so just set it here. + Data.info.flags = (uint32_t)objCConstraint; + break; + case MachOLinkingContext::objc_retainRelease: + // We don't need to encode this flag, so just leave the flags as 0. + Data.info.flags = 0; + break; + } + + Data.info.flags |= (swiftVersion << 8); + } + + ~ObjCImageInfoAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeObjCImageInfo; + } + + Alignment alignment() const override { + return 4; + } + + uint64_t size() const override { + return 8; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR__; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(Data.bytes, size()); + } + +private: + + struct objc_image_info { + uint32_t version; + uint32_t flags; + }; + + union { + objc_image_info info; + uint8_t bytes[8]; + } Data; +}; + +class ObjCPass : public Pass { +public: + ObjCPass(const MachOLinkingContext &context) + : _ctx(context), + _file(*_ctx.make_file<MachOFile>("<mach-o objc pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Add the image info. + mergedFile.addAtom(*getImageInfo()); + + return llvm::Error(); + } + +private: + + const DefinedAtom* getImageInfo() { + return new (_file.allocator()) ObjCImageInfoAtom(_file, + _ctx.objcConstraint(), + _ctx.swiftVersion()); + } + + const MachOLinkingContext &_ctx; + MachOFile &_file; +}; + + + +void addObjCPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<ObjCPass>(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h new file mode 100644 index 00000000000..49e65f63151 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/SectCreateFile.h @@ -0,0 +1,102 @@ +//===---- lib/ReaderWriter/MachO/SectCreateFile.h ---------------*- c++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H +#define LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H + +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" + +namespace lld { +namespace mach_o { + +// +// A FlateNamespaceFile instance may be added as a resolution source of last +// resort, depending on how -flat_namespace and -undefined are set. +// +class SectCreateFile : public File { +public: + class SectCreateAtom : public SimpleDefinedAtom { + public: + SectCreateAtom(const File &file, StringRef segName, StringRef sectName, + std::unique_ptr<MemoryBuffer> content) + : SimpleDefinedAtom(file), + _combinedName((segName + "/" + sectName).str()), + _content(std::move(content)) {} + + ~SectCreateAtom() override = default; + + uint64_t size() const override { return _content->getBufferSize(); } + + Scope scope() const override { return scopeGlobal; } + + ContentType contentType() const override { return typeSectCreate; } + + SectionChoice sectionChoice() const override { return sectionCustomRequired; } + + StringRef customSectionName() const override { return _combinedName; } + + DeadStripKind deadStrip() const override { return deadStripNever; } + + ArrayRef<uint8_t> rawContent() const override { + const uint8_t *data = + reinterpret_cast<const uint8_t*>(_content->getBufferStart()); + return ArrayRef<uint8_t>(data, _content->getBufferSize()); + } + + StringRef segmentName() const { return _segName; } + StringRef sectionName() const { return _sectName; } + + private: + std::string _combinedName; + StringRef _segName; + StringRef _sectName; + std::unique_ptr<MemoryBuffer> _content; + }; + + SectCreateFile() : File("sectcreate", kindSectCreateObject) {} + + void addSection(StringRef seg, StringRef sect, + std::unique_ptr<MemoryBuffer> content) { + _definedAtoms.push_back( + new (allocator()) SectCreateAtom(*this, seg, sect, std::move(content))); + } + + const AtomRange<DefinedAtom> defined() const override { + return _definedAtoms; + } + + const AtomRange<UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _definedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + +private: + AtomVector<DefinedAtom> _definedAtoms; +}; + +} // namespace mach_o +} // namespace lld + +#endif // LLD_READER_WRITER_MACHO_SECTCREATE_FILE_H diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp new file mode 100644 index 00000000000..cd536714665 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/ShimPass.cpp @@ -0,0 +1,129 @@ +//===- lib/ReaderWriter/MachO/ShimPass.cpp -------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates branch-sites whose target is a different mode +// (thumb vs arm). +// +// Arm code has two instruction encodings thumb and arm. When branching from +// one code encoding to another, you need to use an instruction that switches +// the instruction mode. Usually the transition only happens at call sites, and +// the linker can transform a BL instruction in BLX (or vice versa). But if the +// compiler did a tail call optimization and a function ends with a branch (not +// branch and link), there is no pc-rel BX instruction. +// +// The ShimPass looks for pc-rel B instructions that will need to switch mode. +// For those cases it synthesizes a shim which does the transition, then +// modifies the original atom with the B instruction to target to the shim atom. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" + +namespace lld { +namespace mach_o { + +class ShimPass : public Pass { +public: + ShimPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file<MachOFile>("<mach-o shim pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at non-call branches. + if (!_archHandler.isNonCallBranch(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (const lld::DefinedAtom *daTarget = dyn_cast<DefinedAtom>(target)) { + bool atomIsThumb = _archHandler.isThumbFunction(*atom); + bool targetIsThumb = _archHandler.isThumbFunction(*daTarget); + if (atomIsThumb != targetIsThumb) + updateBranchToUseShim(atomIsThumb, *daTarget, ref); + } + } + } + // Exit early if no shims needed. + if (_targetToShim.empty()) + return llvm::Error(); + + // Sort shim atoms so the layout order is stable. + std::vector<const DefinedAtom *> shims; + shims.reserve(_targetToShim.size()); + for (auto element : _targetToShim) { + shims.push_back(element.second); + } + std::sort(shims.begin(), shims.end(), + [](const DefinedAtom *l, const DefinedAtom *r) { + return (l->name() < r->name()); + }); + + // Add all shims to master file. + for (const DefinedAtom *shim : shims) + mergedFile.addAtom(*shim); + + return llvm::Error(); + } + +private: + + void updateBranchToUseShim(bool thumbToArm, const DefinedAtom& target, + const Reference *ref) { + // Make file-format specific stub and other support atoms. + const DefinedAtom *shim = this->getShim(thumbToArm, target); + assert(shim != nullptr); + // Switch branch site to target shim atom. + const_cast<Reference *>(ref)->setTarget(shim); + } + + const DefinedAtom* getShim(bool thumbToArm, const DefinedAtom& target) { + auto pos = _targetToShim.find(&target); + if ( pos != _targetToShim.end() ) { + // Reuse an existing shim. + assert(pos->second != nullptr); + return pos->second; + } else { + // There is no existing shim, so create a new one. + const DefinedAtom *shim = _archHandler.createShim(_file, thumbToArm, + target); + _targetToShim[&target] = shim; + return shim; + } + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + llvm::DenseMap<const Atom*, const DefinedAtom*> _targetToShim; +}; + + + +void addShimPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(llvm::make_unique<ShimPass>(ctx)); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp new file mode 100644 index 00000000000..d53b78b24d1 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/StubsPass.cpp @@ -0,0 +1,379 @@ +//===- lib/ReaderWriter/MachO/StubsPass.cpp ---------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This linker pass updates call-sites which have references to shared library +// atoms to instead have a reference to a stub (PLT entry) for the specified +// symbol. Each file format defines a subclass of StubsPass which implements +// the abstract methods for creating the file format specific StubAtoms. +// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/File.h" +#include "lld/Core/LLVM.h" +#include "lld/Core/Reference.h" +#include "lld/Core/Simple.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" + +namespace lld { +namespace mach_o { + +// +// Lazy Pointer Atom created by the stubs pass. +// +class LazyPointerAtom : public SimpleDefinedAtom { +public: + LazyPointerAtom(const File &file, bool is64) + : SimpleDefinedAtom(file), _is64(is64) { } + + ~LazyPointerAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeLazyPointer; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; +}; + +// +// NonLazyPointer (GOT) Atom created by the stubs pass. +// +class NonLazyPointerAtom : public SimpleDefinedAtom { +public: + NonLazyPointerAtom(const File &file, bool is64, ContentType contentType) + : SimpleDefinedAtom(file), _is64(is64), _contentType(contentType) { } + + ~NonLazyPointerAtom() override = default; + + ContentType contentType() const override { + return _contentType; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + +private: + const bool _is64; + const ContentType _contentType; +}; + +// +// Stub Atom created by the stubs pass. +// +class StubAtom : public SimpleDefinedAtom { +public: + StubAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo){ } + + ~StubAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStub; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubBytes, _stubInfo.stubSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Atom created by the stubs pass. +// +class StubHelperAtom : public SimpleDefinedAtom { +public: + StubHelperAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.codeAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperBytes, + _stubInfo.stubHelperSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +// +// Stub Helper Common Atom created by the stubs pass. +// +class StubHelperCommonAtom : public SimpleDefinedAtom { +public: + StubHelperCommonAtom(const File &file, const ArchHandler::StubInfo &stubInfo) + : SimpleDefinedAtom(file), _stubInfo(stubInfo) { } + + ~StubHelperCommonAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeStubHelper; + } + + Alignment alignment() const override { + return 1 << _stubInfo.stubHelperCommonAlignment; + } + + uint64_t size() const override { + return _stubInfo.stubHelperCommonSize; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permR_X; + } + + ArrayRef<uint8_t> rawContent() const override { + return llvm::makeArrayRef(_stubInfo.stubHelperCommonBytes, + _stubInfo.stubHelperCommonSize); + } + +private: + const ArchHandler::StubInfo &_stubInfo; +}; + +class StubsPass : public Pass { +public: + StubsPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _stubInfo(_archHandler.stubInfo()), + _file(*_ctx.make_file<MachOFile>("<mach-o Stubs pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + + llvm::Error perform(SimpleFile &mergedFile) override { + // Skip this pass if output format uses text relocations instead of stubs. + if (!this->noTextRelocs()) + return llvm::Error(); + + // Scan all references in all atoms. + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + // Look at call-sites. + if (!this->isCallSite(*ref)) + continue; + const Atom *target = ref->target(); + assert(target != nullptr); + if (isa<SharedLibraryAtom>(target)) { + // Calls to shared libraries go through stubs. + _targetToUses[target].push_back(ref); + continue; + } + const DefinedAtom *defTarget = dyn_cast<DefinedAtom>(target); + if (defTarget && defTarget->interposable() != DefinedAtom::interposeNo){ + // Calls to interposable functions in same linkage unit must also go + // through a stub. + assert(defTarget->scope() != DefinedAtom::scopeTranslationUnit); + _targetToUses[target].push_back(ref); + } + } + } + + // Exit early if no stubs needed. + if (_targetToUses.empty()) + return llvm::Error(); + + // First add help-common and GOT slots used by lazy binding. + SimpleDefinedAtom *helperCommonAtom = + new (_file.allocator()) StubHelperCommonAtom(_file, _stubInfo); + SimpleDefinedAtom *helperCacheNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + SimpleDefinedAtom *helperBinderNLPAtom = + new (_file.allocator()) NonLazyPointerAtom(_file, _ctx.is64Bit(), + _stubInfo.stubHelperImageCacheContentType); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + helperCacheNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToCache, + _stubInfo.optStubHelperCommonReferenceToCache, helperCacheNLPAtom); + addReference(helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + helperBinderNLPAtom); + addOptReference( + helperCommonAtom, _stubInfo.stubHelperCommonReferenceToBinder, + _stubInfo.optStubHelperCommonReferenceToBinder, helperBinderNLPAtom); + mergedFile.addAtom(*helperCommonAtom); + mergedFile.addAtom(*helperBinderNLPAtom); + mergedFile.addAtom(*helperCacheNLPAtom); + + // Add reference to dyld_stub_binder in libSystem.dylib + auto I = std::find_if( + mergedFile.sharedLibrary().begin(), mergedFile.sharedLibrary().end(), + [&](const SharedLibraryAtom *atom) { + return atom->name().equals(_stubInfo.binderSymbolName); + }); + assert(I != mergedFile.sharedLibrary().end() && + "dyld_stub_binder not found"); + addReference(helperBinderNLPAtom, _stubInfo.nonLazyPointerReferenceToBinder, *I); + + // Sort targets by name, so stubs and lazy pointers are consistent + std::vector<const Atom *> targetsNeedingStubs; + for (auto it : _targetToUses) + targetsNeedingStubs.push_back(it.first); + std::sort(targetsNeedingStubs.begin(), targetsNeedingStubs.end(), + [](const Atom * left, const Atom * right) { + return (left->name().compare(right->name()) < 0); + }); + + // Make and append stubs, lazy pointers, and helpers in alphabetical order. + unsigned lazyOffset = 0; + for (const Atom *target : targetsNeedingStubs) { + auto *stub = new (_file.allocator()) StubAtom(_file, _stubInfo); + auto *lp = + new (_file.allocator()) LazyPointerAtom(_file, _ctx.is64Bit()); + auto *helper = new (_file.allocator()) StubHelperAtom(_file, _stubInfo); + + addReference(stub, _stubInfo.stubReferenceToLP, lp); + addOptReference(stub, _stubInfo.stubReferenceToLP, + _stubInfo.optStubReferenceToLP, lp); + addReference(lp, _stubInfo.lazyPointerReferenceToHelper, helper); + addReference(lp, _stubInfo.lazyPointerReferenceToFinal, target); + addReference(helper, _stubInfo.stubHelperReferenceToImm, helper); + addReferenceAddend(helper, _stubInfo.stubHelperReferenceToImm, helper, + lazyOffset); + addReference(helper, _stubInfo.stubHelperReferenceToHelperCommon, + helperCommonAtom); + + mergedFile.addAtom(*stub); + mergedFile.addAtom(*lp); + mergedFile.addAtom(*helper); + + // Update each reference to use stub. + for (const Reference *ref : _targetToUses[target]) { + assert(ref->target() == target); + // Switch call site to reference stub atom instead. + const_cast<Reference *>(ref)->setTarget(stub); + } + + // Calculate new offset + lazyOffset += target->name().size() + 12; + } + + return llvm::Error(); + } + +private: + bool noTextRelocs() { + return true; + } + + bool isCallSite(const Reference &ref) { + return _archHandler.isCallSite(ref); + } + + void addReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom* target) { + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, refInfo.kind, refInfo.offset, + target, refInfo.addend); + } + + void addReferenceAddend(SimpleDefinedAtom *atom, + const ArchHandler::ReferenceInfo &refInfo, + const lld::Atom *target, uint64_t addend) { + atom->addReference(Reference::KindNamespace::mach_o, refInfo.arch, + refInfo.kind, refInfo.offset, target, addend); + } + + void addOptReference(SimpleDefinedAtom* atom, + const ArchHandler::ReferenceInfo &refInfo, + const ArchHandler::OptionalRefInfo &optRef, + const lld::Atom* target) { + if (!optRef.used) + return; + atom->addReference(Reference::KindNamespace::mach_o, + refInfo.arch, optRef.kind, optRef.offset, + target, optRef.addend); + } + + typedef llvm::DenseMap<const Atom*, + llvm::SmallVector<const Reference *, 8>> TargetToUses; + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + const ArchHandler::StubInfo &_stubInfo; + MachOFile &_file; + TargetToUses _targetToUses; +}; + +void addStubsPass(PassManager &pm, const MachOLinkingContext &ctx) { + pm.add(std::unique_ptr<Pass>(new StubsPass(ctx))); +} + +} // end namespace mach_o +} // end namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp new file mode 100644 index 00000000000..7a8496c20a4 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/TLVPass.cpp @@ -0,0 +1,141 @@ +//===- lib/ReaderWriter/MachO/TLVPass.cpp -----------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This linker pass transforms all TLV references to real references. +/// +//===----------------------------------------------------------------------===// + +#include "ArchHandler.h" +#include "File.h" +#include "MachOPasses.h" +#include "lld/Core/Simple.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Debug.h" + +namespace lld { +namespace mach_o { + +// +// TLVP Entry Atom created by the TLV pass. +// +class TLVPEntryAtom : public SimpleDefinedAtom { +public: + TLVPEntryAtom(const File &file, bool is64, StringRef name) + : SimpleDefinedAtom(file), _is64(is64), _name(name) {} + + ~TLVPEntryAtom() override = default; + + ContentType contentType() const override { + return DefinedAtom::typeTLVInitializerPtr; + } + + Alignment alignment() const override { + return _is64 ? 8 : 4; + } + + uint64_t size() const override { + return _is64 ? 8 : 4; + } + + ContentPermissions permissions() const override { + return DefinedAtom::permRW_; + } + + ArrayRef<uint8_t> rawContent() const override { + static const uint8_t zeros[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + return llvm::makeArrayRef(zeros, size()); + } + + StringRef slotName() const { + return _name; + } + +private: + const bool _is64; + StringRef _name; +}; + +class TLVPass : public Pass { +public: + TLVPass(const MachOLinkingContext &context) + : _ctx(context), _archHandler(_ctx.archHandler()), + _file(*_ctx.make_file<MachOFile>("<mach-o TLV pass>")) { + _file.setOrdinal(_ctx.getNextOrdinalAndIncrement()); + } + +private: + llvm::Error perform(SimpleFile &mergedFile) override { + bool allowTLV = _ctx.minOS("10.7", "1.0"); + + for (const DefinedAtom *atom : mergedFile.defined()) { + for (const Reference *ref : *atom) { + if (!_archHandler.isTLVAccess(*ref)) + continue; + + if (!allowTLV) + return llvm::make_error<GenericError>( + "targeted OS version does not support use of thread local " + "variables in " + atom->name() + " for architecture " + + _ctx.archName()); + + const Atom *target = ref->target(); + assert(target != nullptr); + + const DefinedAtom *tlvpEntry = makeTLVPEntry(target); + const_cast<Reference*>(ref)->setTarget(tlvpEntry); + _archHandler.updateReferenceToTLV(ref); + } + } + + std::vector<const TLVPEntryAtom*> entries; + entries.reserve(_targetToTLVP.size()); + for (auto &it : _targetToTLVP) + entries.push_back(it.second); + std::sort(entries.begin(), entries.end(), + [](const TLVPEntryAtom *lhs, const TLVPEntryAtom *rhs) { + return (lhs->slotName().compare(rhs->slotName()) < 0); + }); + + for (const TLVPEntryAtom *slot : entries) + mergedFile.addAtom(*slot); + + return llvm::Error(); + } + + const DefinedAtom *makeTLVPEntry(const Atom *target) { + auto pos = _targetToTLVP.find(target); + + if (pos != _targetToTLVP.end()) + return pos->second; + + auto *tlvpEntry = new (_file.allocator()) + TLVPEntryAtom(_file, _ctx.is64Bit(), target->name()); + _targetToTLVP[target] = tlvpEntry; + const ArchHandler::ReferenceInfo &nlInfo = + _archHandler.stubInfo().nonLazyPointerReferenceToBinder; + tlvpEntry->addReference(Reference::KindNamespace::mach_o, nlInfo.arch, + nlInfo.kind, 0, target, 0); + return tlvpEntry; + } + + const MachOLinkingContext &_ctx; + mach_o::ArchHandler &_archHandler; + MachOFile &_file; + llvm::DenseMap<const Atom*, const TLVPEntryAtom*> _targetToTLVP; +}; + +void addTLVPass(PassManager &pm, const MachOLinkingContext &ctx) { + assert(ctx.needsTLVPass()); + pm.add(llvm::make_unique<TLVPass>(ctx)); +} + +} // end namesapce mach_o +} // end namesapce lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp new file mode 100644 index 00000000000..f08487f21ac --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/MachO/WriterMachO.cpp @@ -0,0 +1,71 @@ +//===- lib/ReaderWriter/MachO/WriterMachO.cpp -----------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ExecutableAtoms.h" +#include "MachONormalizedFile.h" +#include "lld/Core/File.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/MachOLinkingContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/raw_ostream.h" +#include <system_error> + +using lld::mach_o::normalized::NormalizedFile; + +namespace lld { +namespace mach_o { + +class MachOWriter : public Writer { +public: + MachOWriter(const MachOLinkingContext &ctxt) : _ctx(ctxt) {} + + llvm::Error writeFile(const lld::File &file, StringRef path) override { + // Construct empty normalized file from atoms. + llvm::Expected<std::unique_ptr<NormalizedFile>> nFile = + normalized::normalizedFromAtoms(file, _ctx); + if (auto ec = nFile.takeError()) + return ec; + + // For testing, write out yaml form of normalized file. + if (_ctx.printAtoms()) { + std::unique_ptr<Writer> yamlWriter = createWriterYAML(_ctx); + if (auto ec = yamlWriter->writeFile(file, "-")) + return ec; + } + + // Write normalized file as mach-o binary. + return writeBinary(*nFile->get(), path); + } + + void createImplicitFiles(std::vector<std::unique_ptr<File>> &r) override { + // When building main executables, add _main as required entry point. + if (_ctx.outputTypeHasEntry()) + r.emplace_back(new CEntryFile(_ctx)); + // If this can link with dylibs, need helper function (dyld_stub_binder). + if (_ctx.needsStubsPass()) + r.emplace_back(new StubHelperFile(_ctx)); + // Final linked images can access a symbol for their mach_header. + if (_ctx.outputMachOType() != llvm::MachO::MH_OBJECT) + r.emplace_back(new MachHeaderAliasFile(_ctx)); + } +private: + const MachOLinkingContext &_ctx; + }; + + +} // namespace mach_o + +std::unique_ptr<Writer> createWriterMachO(const MachOLinkingContext &context) { + return std::unique_ptr<Writer>(new lld::mach_o::MachOWriter(context)); +} + +} // namespace lld diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt b/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt new file mode 100644 index 00000000000..5c25444e5db --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/CMakeLists.txt @@ -0,0 +1,6 @@ +add_lld_library(lldYAML + ReaderWriterYAML.cpp + LINK_LIBS + lldCore + LLVMSupport + ) diff --git a/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp b/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp new file mode 100644 index 00000000000..ee2a9ec1088 --- /dev/null +++ b/gnu/llvm/tools/lld/lib/ReaderWriter/YAML/ReaderWriterYAML.cpp @@ -0,0 +1,1399 @@ +//===- lib/ReaderWriter/YAML/ReaderWriterYAML.cpp -------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Core/AbsoluteAtom.h" +#include "lld/Core/ArchiveLibraryFile.h" +#include "lld/Core/Atom.h" +#include "lld/Core/DefinedAtom.h" +#include "lld/Core/Error.h" +#include "lld/Core/File.h" +#include "lld/Core/LinkingContext.h" +#include "lld/Core/Reader.h" +#include "lld/Core/Reference.h" +#include "lld/Core/SharedLibraryAtom.h" +#include "lld/Core/Simple.h" +#include "lld/Core/UndefinedAtom.h" +#include "lld/Core/Writer.h" +#include "lld/ReaderWriter/YamlContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include <cassert> +#include <cstdint> +#include <cstring> +#include <memory> +#include <string> +#include <system_error> +#include <vector> + +using llvm::yaml::MappingTraits; +using llvm::yaml::ScalarEnumerationTraits; +using llvm::yaml::ScalarTraits; +using llvm::yaml::IO; +using llvm::yaml::SequenceTraits; +using llvm::yaml::DocumentListTraits; + +using namespace lld; + +/// The conversion of Atoms to and from YAML uses LLVM's YAML I/O. This +/// file just defines template specializations on the lld types which control +/// how the mapping is done to and from YAML. + +namespace { + +/// Used when writing yaml files. +/// In most cases, atoms names are unambiguous, so references can just +/// use the atom name as the target (e.g. target: foo). But in a few +/// cases that does not work, so ref-names are added. These are labels +/// used only in yaml. The labels do not exist in the Atom model. +/// +/// One need for ref-names are when atoms have no user supplied name +/// (e.g. c-string literal). Another case is when two object files with +/// identically named static functions are merged (ld -r) into one object file. +/// In that case referencing the function by name is ambiguous, so a unique +/// ref-name is added. +class RefNameBuilder { +public: + RefNameBuilder(const lld::File &file) + : _collisionCount(0), _unnamedCounter(0) { + // visit all atoms + for (const lld::DefinedAtom *atom : file.defined()) { + // Build map of atoms names to detect duplicates + if (!atom->name().empty()) + buildDuplicateNameMap(*atom); + + // Find references to unnamed atoms and create ref-names for them. + for (const lld::Reference *ref : *atom) { + // create refname for any unnamed reference target + const lld::Atom *target = ref->target(); + if ((target != nullptr) && target->name().empty()) { + std::string storage; + llvm::raw_string_ostream buffer(storage); + buffer << llvm::format("L%03d", _unnamedCounter++); + StringRef newName = copyString(buffer.str()); + _refNames[target] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "unnamed atom: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() << ", " + << newName.size() << ")\n"); + } + } + } + for (const lld::UndefinedAtom *undefAtom : file.undefined()) { + buildDuplicateNameMap(*undefAtom); + } + for (const lld::SharedLibraryAtom *shlibAtom : file.sharedLibrary()) { + buildDuplicateNameMap(*shlibAtom); + } + for (const lld::AbsoluteAtom *absAtom : file.absolute()) { + if (!absAtom->name().empty()) + buildDuplicateNameMap(*absAtom); + } + } + + void buildDuplicateNameMap(const lld::Atom &atom) { + assert(!atom.name().empty()); + NameToAtom::iterator pos = _nameMap.find(atom.name()); + if (pos != _nameMap.end()) { + // Found name collision, give each a unique ref-name. + std::string Storage; + llvm::raw_string_ostream buffer(Storage); + buffer << atom.name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName = copyString(buffer.str()); + _refNames[&atom] = newName; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName << "' (" + << (const void *)newName.data() + << ", " << newName.size() << ")\n"); + const lld::Atom *prevAtom = pos->second; + AtomToRefName::iterator pos2 = _refNames.find(prevAtom); + if (pos2 == _refNames.end()) { + // Only create ref-name for previous if none already created. + std::string Storage2; + llvm::raw_string_ostream buffer2(Storage2); + buffer2 << prevAtom->name() << llvm::format(".%03d", ++_collisionCount); + StringRef newName2 = copyString(buffer2.str()); + _refNames[prevAtom] = newName2; + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "name collsion: creating ref-name: '" + << newName2 << "' (" + << (const void *)newName2.data() << ", " + << newName2.size() << ")\n"); + } + } else { + // First time we've seen this name, just add it to map. + _nameMap[atom.name()] = &atom; + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "atom name seen for first time: '" + << atom.name() << "' (" + << (const void *)atom.name().data() + << ", " << atom.name().size() << ")\n"); + } + } + + bool hasRefName(const lld::Atom *atom) { return _refNames.count(atom); } + + StringRef refName(const lld::Atom *atom) { + return _refNames.find(atom)->second; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + typedef llvm::DenseMap<const lld::Atom *, std::string> AtomToRefName; + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when RefNameBuilder is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + unsigned int _collisionCount; + unsigned int _unnamedCounter; + NameToAtom _nameMap; + AtomToRefName _refNames; + llvm::BumpPtrAllocator _storage; +}; + +/// Used when reading yaml files to find the target of a reference +/// that could be a name or ref-name. +class RefNameResolver { +public: + RefNameResolver(const lld::File *file, IO &io); + + const lld::Atom *lookup(StringRef name) const { + NameToAtom::const_iterator pos = _nameMap.find(name); + if (pos != _nameMap.end()) + return pos->second; + _io.setError(Twine("no such atom name: ") + name); + return nullptr; + } + +private: + typedef llvm::StringMap<const lld::Atom *> NameToAtom; + + void add(StringRef name, const lld::Atom *atom) { + if (_nameMap.count(name)) { + _io.setError(Twine("duplicate atom name: ") + name); + } else { + _nameMap[name] = atom; + } + } + + IO &_io; + NameToAtom _nameMap; +}; + +/// Mapping of Atoms. +template <typename T> class AtomList { + using Ty = std::vector<OwningAtomPtr<T>>; + +public: + typename Ty::iterator begin() { return _atoms.begin(); } + typename Ty::iterator end() { return _atoms.end(); } + Ty _atoms; +}; + +/// Mapping of kind: field in yaml files. +enum FileKinds { + fileKindObjectAtoms, // atom based object file encoded in yaml + fileKindArchive, // static archive library encoded in yaml + fileKindObjectMachO // mach-o object files encoded in yaml +}; + +struct ArchMember { + FileKinds _kind; + StringRef _name; + const lld::File *_content; +}; + +// The content bytes in a DefinedAtom are just uint8_t but we want +// special formatting, so define a strong type. +LLVM_YAML_STRONG_TYPEDEF(uint8_t, ImplicitHex8) + +// SharedLibraryAtoms have a bool canBeNull() method which we'd like to be +// more readable than just true/false. +LLVM_YAML_STRONG_TYPEDEF(bool, ShlibCanBeNull) + +// lld::Reference::Kind is a tuple of <namespace, arch, value>. +// For yaml, we just want one string that encapsulates the tuple. +struct RefKind { + Reference::KindNamespace ns; + Reference::KindArch arch; + Reference::KindValue value; +}; + +} // end anonymous namespace + +LLVM_YAML_IS_SEQUENCE_VECTOR(ArchMember) +LLVM_YAML_IS_SEQUENCE_VECTOR(const lld::Reference *) +// Always write DefinedAtoms content bytes as a flow sequence. +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(ImplicitHex8) +// for compatibility with gcc-4.7 in C++11 mode, add extra namespace +namespace llvm { +namespace yaml { + +// This is a custom formatter for RefKind +template <> struct ScalarTraits<RefKind> { + static void output(const RefKind &kind, void *ctxt, raw_ostream &out) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + StringRef str; + if (info->_registry->referenceKindToString(kind.ns, kind.arch, kind.value, + str)) + out << str; + else + out << (int)(kind.ns) << "-" << (int)(kind.arch) << "-" << kind.value; + } + + static StringRef input(StringRef scalar, void *ctxt, RefKind &kind) { + assert(ctxt != nullptr); + YamlContext *info = reinterpret_cast<YamlContext *>(ctxt); + assert(info->_registry); + if (info->_registry->referenceKindFromString(scalar, kind.ns, kind.arch, + kind.value)) + return StringRef(); + return StringRef("unknown reference kind"); + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<lld::File::Kind> { + static void enumeration(IO &io, lld::File::Kind &value) { + io.enumCase(value, "error-object", lld::File::kindErrorObject); + io.enumCase(value, "object", lld::File::kindMachObject); + io.enumCase(value, "shared-library", lld::File::kindSharedLibrary); + io.enumCase(value, "static-library", lld::File::kindArchiveLibrary); + } +}; + +template <> struct ScalarEnumerationTraits<lld::Atom::Scope> { + static void enumeration(IO &io, lld::Atom::Scope &value) { + io.enumCase(value, "global", lld::Atom::scopeGlobal); + io.enumCase(value, "hidden", lld::Atom::scopeLinkageUnit); + io.enumCase(value, "static", lld::Atom::scopeTranslationUnit); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::SectionChoice> { + static void enumeration(IO &io, lld::DefinedAtom::SectionChoice &value) { + io.enumCase(value, "content", lld::DefinedAtom::sectionBasedOnContent); + io.enumCase(value, "custom", lld::DefinedAtom::sectionCustomPreferred); + io.enumCase(value, "custom-required", + lld::DefinedAtom::sectionCustomRequired); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Interposable> { + static void enumeration(IO &io, lld::DefinedAtom::Interposable &value) { + io.enumCase(value, "no", DefinedAtom::interposeNo); + io.enumCase(value, "yes", DefinedAtom::interposeYes); + io.enumCase(value, "yes-and-weak", DefinedAtom::interposeYesAndRuntimeWeak); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::Merge> { + static void enumeration(IO &io, lld::DefinedAtom::Merge &value) { + io.enumCase(value, "no", lld::DefinedAtom::mergeNo); + io.enumCase(value, "as-tentative", lld::DefinedAtom::mergeAsTentative); + io.enumCase(value, "as-weak", lld::DefinedAtom::mergeAsWeak); + io.enumCase(value, "as-addressed-weak", + lld::DefinedAtom::mergeAsWeakAndAddressUsed); + io.enumCase(value, "by-content", lld::DefinedAtom::mergeByContent); + io.enumCase(value, "same-name-and-size", + lld::DefinedAtom::mergeSameNameAndSize); + io.enumCase(value, "largest", lld::DefinedAtom::mergeByLargestSection); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DeadStripKind> { + static void enumeration(IO &io, lld::DefinedAtom::DeadStripKind &value) { + io.enumCase(value, "normal", lld::DefinedAtom::deadStripNormal); + io.enumCase(value, "never", lld::DefinedAtom::deadStripNever); + io.enumCase(value, "always", lld::DefinedAtom::deadStripAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::DynamicExport> { + static void enumeration(IO &io, lld::DefinedAtom::DynamicExport &value) { + io.enumCase(value, "normal", lld::DefinedAtom::dynamicExportNormal); + io.enumCase(value, "always", lld::DefinedAtom::dynamicExportAlways); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::CodeModel> { + static void enumeration(IO &io, lld::DefinedAtom::CodeModel &value) { + io.enumCase(value, "none", lld::DefinedAtom::codeNA); + io.enumCase(value, "mips-pic", lld::DefinedAtom::codeMipsPIC); + io.enumCase(value, "mips-micro", lld::DefinedAtom::codeMipsMicro); + io.enumCase(value, "mips-micro-pic", lld::DefinedAtom::codeMipsMicroPIC); + io.enumCase(value, "mips-16", lld::DefinedAtom::codeMips16); + io.enumCase(value, "arm-thumb", lld::DefinedAtom::codeARMThumb); + io.enumCase(value, "arm-a", lld::DefinedAtom::codeARM_a); + io.enumCase(value, "arm-d", lld::DefinedAtom::codeARM_d); + io.enumCase(value, "arm-t", lld::DefinedAtom::codeARM_t); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::DefinedAtom::ContentPermissions> { + static void enumeration(IO &io, lld::DefinedAtom::ContentPermissions &value) { + io.enumCase(value, "---", lld::DefinedAtom::perm___); + io.enumCase(value, "r--", lld::DefinedAtom::permR__); + io.enumCase(value, "r-x", lld::DefinedAtom::permR_X); + io.enumCase(value, "rw-", lld::DefinedAtom::permRW_); + io.enumCase(value, "rwx", lld::DefinedAtom::permRWX); + io.enumCase(value, "rw-l", lld::DefinedAtom::permRW_L); + io.enumCase(value, "unknown", lld::DefinedAtom::permUnknown); + } +}; + +template <> struct ScalarEnumerationTraits<lld::DefinedAtom::ContentType> { + static void enumeration(IO &io, lld::DefinedAtom::ContentType &value) { + io.enumCase(value, "unknown", DefinedAtom::typeUnknown); + io.enumCase(value, "code", DefinedAtom::typeCode); + io.enumCase(value, "stub", DefinedAtom::typeStub); + io.enumCase(value, "constant", DefinedAtom::typeConstant); + io.enumCase(value, "data", DefinedAtom::typeData); + io.enumCase(value, "quick-data", DefinedAtom::typeDataFast); + io.enumCase(value, "zero-fill", DefinedAtom::typeZeroFill); + io.enumCase(value, "zero-fill-quick", DefinedAtom::typeZeroFillFast); + io.enumCase(value, "const-data", DefinedAtom::typeConstData); + io.enumCase(value, "got", DefinedAtom::typeGOT); + io.enumCase(value, "resolver", DefinedAtom::typeResolver); + io.enumCase(value, "branch-island", DefinedAtom::typeBranchIsland); + io.enumCase(value, "branch-shim", DefinedAtom::typeBranchShim); + io.enumCase(value, "stub-helper", DefinedAtom::typeStubHelper); + io.enumCase(value, "c-string", DefinedAtom::typeCString); + io.enumCase(value, "utf16-string", DefinedAtom::typeUTF16String); + io.enumCase(value, "unwind-cfi", DefinedAtom::typeCFI); + io.enumCase(value, "unwind-lsda", DefinedAtom::typeLSDA); + io.enumCase(value, "const-4-byte", DefinedAtom::typeLiteral4); + io.enumCase(value, "const-8-byte", DefinedAtom::typeLiteral8); + io.enumCase(value, "const-16-byte", DefinedAtom::typeLiteral16); + io.enumCase(value, "lazy-pointer", DefinedAtom::typeLazyPointer); + io.enumCase(value, "lazy-dylib-pointer", + DefinedAtom::typeLazyDylibPointer); + io.enumCase(value, "cfstring", DefinedAtom::typeCFString); + io.enumCase(value, "initializer-pointer", + DefinedAtom::typeInitializerPtr); + io.enumCase(value, "terminator-pointer", + DefinedAtom::typeTerminatorPtr); + io.enumCase(value, "c-string-pointer",DefinedAtom::typeCStringPtr); + io.enumCase(value, "objc-class-pointer", + DefinedAtom::typeObjCClassPtr); + io.enumCase(value, "objc-category-list", + DefinedAtom::typeObjC2CategoryList); + io.enumCase(value, "objc-image-info", + DefinedAtom::typeObjCImageInfo); + io.enumCase(value, "objc-method-list", + DefinedAtom::typeObjCMethodList); + io.enumCase(value, "objc-class1", DefinedAtom::typeObjC1Class); + io.enumCase(value, "dtraceDOF", DefinedAtom::typeDTraceDOF); + io.enumCase(value, "interposing-tuples", + DefinedAtom::typeInterposingTuples); + io.enumCase(value, "lto-temp", DefinedAtom::typeTempLTO); + io.enumCase(value, "compact-unwind", DefinedAtom::typeCompactUnwindInfo); + io.enumCase(value, "unwind-info", DefinedAtom::typeProcessedUnwindInfo); + io.enumCase(value, "tlv-thunk", DefinedAtom::typeThunkTLV); + io.enumCase(value, "tlv-data", DefinedAtom::typeTLVInitialData); + io.enumCase(value, "tlv-zero-fill", DefinedAtom::typeTLVInitialZeroFill); + io.enumCase(value, "tlv-initializer-ptr", + DefinedAtom::typeTLVInitializerPtr); + io.enumCase(value, "mach_header", DefinedAtom::typeMachHeader); + io.enumCase(value, "dso_handle", DefinedAtom::typeDSOHandle); + io.enumCase(value, "sectcreate", DefinedAtom::typeSectCreate); + } +}; + +template <> struct ScalarEnumerationTraits<lld::UndefinedAtom::CanBeNull> { + static void enumeration(IO &io, lld::UndefinedAtom::CanBeNull &value) { + io.enumCase(value, "never", lld::UndefinedAtom::canBeNullNever); + io.enumCase(value, "at-runtime", lld::UndefinedAtom::canBeNullAtRuntime); + io.enumCase(value, "at-buildtime",lld::UndefinedAtom::canBeNullAtBuildtime); + } +}; + +template <> struct ScalarEnumerationTraits<ShlibCanBeNull> { + static void enumeration(IO &io, ShlibCanBeNull &value) { + io.enumCase(value, "never", false); + io.enumCase(value, "at-runtime", true); + } +}; + +template <> +struct ScalarEnumerationTraits<lld::SharedLibraryAtom::Type> { + static void enumeration(IO &io, lld::SharedLibraryAtom::Type &value) { + io.enumCase(value, "code", lld::SharedLibraryAtom::Type::Code); + io.enumCase(value, "data", lld::SharedLibraryAtom::Type::Data); + io.enumCase(value, "unknown", lld::SharedLibraryAtom::Type::Unknown); + } +}; + +/// This is a custom formatter for lld::DefinedAtom::Alignment. Values look +/// like: +/// 8 # 8-byte aligned +/// 7 mod 16 # 16-byte aligned plus 7 bytes +template <> struct ScalarTraits<lld::DefinedAtom::Alignment> { + static void output(const lld::DefinedAtom::Alignment &value, void *ctxt, + raw_ostream &out) { + if (value.modulus == 0) { + out << llvm::format("%d", value.value); + } else { + out << llvm::format("%d mod %d", value.modulus, value.value); + } + } + + static StringRef input(StringRef scalar, void *ctxt, + lld::DefinedAtom::Alignment &value) { + value.modulus = 0; + size_t modStart = scalar.find("mod"); + if (modStart != StringRef::npos) { + StringRef modStr = scalar.slice(0, modStart); + modStr = modStr.rtrim(); + unsigned int modulus; + if (modStr.getAsInteger(0, modulus)) { + return "malformed alignment modulus"; + } + value.modulus = modulus; + scalar = scalar.drop_front(modStart + 3); + scalar = scalar.ltrim(); + } + unsigned int power; + if (scalar.getAsInteger(0, power)) { + return "malformed alignment power"; + } + value.value = power; + if (value.modulus >= power) { + return "malformed alignment, modulus too large for power"; + } + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +template <> struct ScalarEnumerationTraits<FileKinds> { + static void enumeration(IO &io, FileKinds &value) { + io.enumCase(value, "object", fileKindObjectAtoms); + io.enumCase(value, "archive", fileKindArchive); + io.enumCase(value, "object-mach-o", fileKindObjectMachO); + } +}; + +template <> struct MappingTraits<ArchMember> { + static void mapping(IO &io, ArchMember &member) { + io.mapOptional("kind", member._kind, fileKindObjectAtoms); + io.mapOptional("name", member._name); + io.mapRequired("content", member._content); + } +}; + +// Declare that an AtomList is a yaml sequence. +template <typename T> struct SequenceTraits<AtomList<T> > { + static size_t size(IO &io, AtomList<T> &seq) { return seq._atoms.size(); } + static T *&element(IO &io, AtomList<T> &seq, size_t index) { + if (index >= seq._atoms.size()) + seq._atoms.resize(index + 1); + return seq._atoms[index].get(); + } +}; + +// Declare that an AtomRange is a yaml sequence. +template <typename T> struct SequenceTraits<File::AtomRange<T> > { + static size_t size(IO &io, File::AtomRange<T> &seq) { return seq.size(); } + static T *&element(IO &io, File::AtomRange<T> &seq, size_t index) { + assert(io.outputting() && "AtomRange only used when outputting"); + assert(index < seq.size() && "Out of range access"); + return seq[index].get(); + } +}; + +// Used to allow DefinedAtom content bytes to be a flow sequence of +// two-digit hex numbers without the leading 0x (e.g. FF, 04, 0A) +template <> struct ScalarTraits<ImplicitHex8> { + static void output(const ImplicitHex8 &val, void *, raw_ostream &out) { + uint8_t num = val; + out << llvm::format("%02X", num); + } + + static StringRef input(StringRef str, void *, ImplicitHex8 &val) { + unsigned long long n; + if (getAsUnsignedInteger(str, 16, n)) + return "invalid two-digit-hex number"; + if (n > 0xFF) + return "out of range two-digit-hex number"; + val = n; + return StringRef(); // returning empty string means success + } + + static bool mustQuote(StringRef) { return false; } +}; + +// YAML conversion for std::vector<const lld::File*> +template <> struct DocumentListTraits<std::vector<const lld::File *> > { + static size_t size(IO &io, std::vector<const lld::File *> &seq) { + return seq.size(); + } + static const lld::File *&element(IO &io, std::vector<const lld::File *> &seq, + size_t index) { + if (index >= seq.size()) + seq.resize(index + 1); + return seq[index]; + } +}; + +// YAML conversion for const lld::File* +template <> struct MappingTraits<const lld::File *> { + + class NormArchiveFile : public lld::ArchiveLibraryFile { + public: + NormArchiveFile(IO &io) : ArchiveLibraryFile(""), _path() {} + NormArchiveFile(IO &io, const lld::File *file) + : ArchiveLibraryFile(file->path()), _path(file->path()) { + // If we want to support writing archives, this constructor would + // need to populate _members. + } + + const lld::File *denormalize(IO &io) { return this; } + + const AtomRange<lld::DefinedAtom> defined() const override { + return _noDefinedAtoms; + } + + const AtomRange<lld::UndefinedAtom> undefined() const override { + return _noUndefinedAtoms; + } + + const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override { + return _noSharedLibraryAtoms; + } + + const AtomRange<lld::AbsoluteAtom> absolute() const override { + return _noAbsoluteAtoms; + } + + void clearAtoms() override { + _noDefinedAtoms.clear(); + _noUndefinedAtoms.clear(); + _noSharedLibraryAtoms.clear(); + _noAbsoluteAtoms.clear(); + } + + File *find(StringRef name) override { + for (const ArchMember &member : _members) + for (const lld::DefinedAtom *atom : member._content->defined()) + if (name == atom->name()) + return const_cast<File *>(member._content); + return nullptr; + } + + std::error_code + parseAllMembers(std::vector<std::unique_ptr<File>> &result) override { + return std::error_code(); + } + + StringRef _path; + std::vector<ArchMember> _members; + }; + + class NormalizedFile : public lld::File { + public: + NormalizedFile(IO &io) + : File("", kindNormalizedObject), _io(io), _rnb(nullptr), + _definedAtomsRef(_definedAtoms._atoms), + _undefinedAtomsRef(_undefinedAtoms._atoms), + _sharedLibraryAtomsRef(_sharedLibraryAtoms._atoms), + _absoluteAtomsRef(_absoluteAtoms._atoms) {} + NormalizedFile(IO &io, const lld::File *file) + : File(file->path(), kindNormalizedObject), _io(io), + _rnb(new RefNameBuilder(*file)), _path(file->path()), + _definedAtomsRef(file->defined()), + _undefinedAtomsRef(file->undefined()), + _sharedLibraryAtomsRef(file->sharedLibrary()), + _absoluteAtomsRef(file->absolute()) { + } + + ~NormalizedFile() override { + } + + const lld::File *denormalize(IO &io); + + const AtomRange<lld::DefinedAtom> defined() const override { + return _definedAtomsRef; + } + + const AtomRange<lld::UndefinedAtom> undefined() const override { + return _undefinedAtomsRef; + } + + const AtomRange<lld::SharedLibraryAtom> sharedLibrary() const override { + return _sharedLibraryAtomsRef; + } + + const AtomRange<lld::AbsoluteAtom> absolute() const override { + return _absoluteAtomsRef; + } + + void clearAtoms() override { + _definedAtoms._atoms.clear(); + _undefinedAtoms._atoms.clear(); + _sharedLibraryAtoms._atoms.clear(); + _absoluteAtoms._atoms.clear(); + } + + // Allocate a new copy of this string in _storage, so the strings + // can be freed when File is destroyed. + StringRef copyString(StringRef str) { + char *s = _storage.Allocate<char>(str.size()); + memcpy(s, str.data(), str.size()); + return StringRef(s, str.size()); + } + + IO &_io; + std::unique_ptr<RefNameBuilder> _rnb; + StringRef _path; + AtomList<lld::DefinedAtom> _definedAtoms; + AtomList<lld::UndefinedAtom> _undefinedAtoms; + AtomList<lld::SharedLibraryAtom> _sharedLibraryAtoms; + AtomList<lld::AbsoluteAtom> _absoluteAtoms; + AtomRange<lld::DefinedAtom> _definedAtomsRef; + AtomRange<lld::UndefinedAtom> _undefinedAtomsRef; + AtomRange<lld::SharedLibraryAtom> _sharedLibraryAtomsRef; + AtomRange<lld::AbsoluteAtom> _absoluteAtomsRef; + llvm::BumpPtrAllocator _storage; + }; + + static void mapping(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + // Let any register tag handler process this. + if (info->_registry && info->_registry->handleTaggedDoc(io, file)) + return; + // If no registered handler claims this tag and there is no tag, + // grandfather in as "!native". + if (io.mapTag("!native", true) || io.mapTag("tag:yaml.org,2002:map")) + mappingAtoms(io, file); + } + + static void mappingAtoms(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedFile, const lld::File *> + keys(io, file, nullptr); + assert(info != nullptr); + info->_file = keys.operator->(); + + io.mapOptional("path", keys->_path); + + if (io.outputting()) { + io.mapOptional("defined-atoms", keys->_definedAtomsRef); + io.mapOptional("undefined-atoms", keys->_undefinedAtomsRef); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtomsRef); + io.mapOptional("absolute-atoms", keys->_absoluteAtomsRef); + } else { + io.mapOptional("defined-atoms", keys->_definedAtoms); + io.mapOptional("undefined-atoms", keys->_undefinedAtoms); + io.mapOptional("shared-library-atoms", keys->_sharedLibraryAtoms); + io.mapOptional("absolute-atoms", keys->_absoluteAtoms); + } + } + + static void mappingArchive(IO &io, const lld::File *&file) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormArchiveFile, const lld::File *> + keys(io, file, &info->_file->allocator()); + + io.mapOptional("path", keys->_path); + io.mapOptional("members", keys->_members); + } +}; + +// YAML conversion for const lld::Reference* +template <> struct MappingTraits<const lld::Reference *> { + + class NormalizedReference : public lld::Reference { + public: + NormalizedReference(IO &io) + : lld::Reference(lld::Reference::KindNamespace::all, + lld::Reference::KindArch::all, 0), + _target(nullptr), _targetName(), _offset(0), _addend(0), _tag(0) {} + + NormalizedReference(IO &io, const lld::Reference *ref) + : lld::Reference(ref->kindNamespace(), ref->kindArch(), + ref->kindValue()), + _target(nullptr), _targetName(targetName(io, ref)), + _offset(ref->offsetInAtom()), _addend(ref->addend()), + _tag(ref->tag()) { + _mappedKind.ns = ref->kindNamespace(); + _mappedKind.arch = ref->kindArch(); + _mappedKind.value = ref->kindValue(); + } + + const lld::Reference *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_targetName.empty()) + _targetName = f->copyString(_targetName); + DEBUG_WITH_TYPE("WriterYAML", llvm::dbgs() + << "created Reference to name: '" + << _targetName << "' (" + << (const void *)_targetName.data() + << ", " << _targetName.size() << ")\n"); + setKindNamespace(_mappedKind.ns); + setKindArch(_mappedKind.arch); + setKindValue(_mappedKind.value); + return this; + } + void bind(const RefNameResolver &); + static StringRef targetName(IO &io, const lld::Reference *ref); + + uint64_t offsetInAtom() const override { return _offset; } + const lld::Atom *target() const override { return _target; } + Addend addend() const override { return _addend; } + void setAddend(Addend a) override { _addend = a; } + void setTarget(const lld::Atom *a) override { _target = a; } + + const lld::Atom *_target; + StringRef _targetName; + uint32_t _offset; + Addend _addend; + RefKind _mappedKind; + uint32_t _tag; + }; + + static void mapping(IO &io, const lld::Reference *&ref) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedReference, const lld::Reference *> keys( + io, ref, &info->_file->allocator()); + + io.mapRequired("kind", keys->_mappedKind); + io.mapOptional("offset", keys->_offset); + io.mapOptional("target", keys->_targetName); + io.mapOptional("addend", keys->_addend, (lld::Reference::Addend)0); + io.mapOptional("tag", keys->_tag, 0u); + } +}; + +// YAML conversion for const lld::DefinedAtom* +template <> struct MappingTraits<const lld::DefinedAtom *> { + + class NormalizedAtom : public lld::DefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _refName(), _contentType(), + _alignment(1), _content(), _references() { + static uint32_t ordinalCounter = 1; + _ordinal = ordinalCounter++; + } + NormalizedAtom(IO &io, const lld::DefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), _refName(), + _scope(atom->scope()), _interpose(atom->interposable()), + _merge(atom->merge()), _contentType(atom->contentType()), + _alignment(atom->alignment()), _sectionChoice(atom->sectionChoice()), + _deadStrip(atom->deadStrip()), _dynamicExport(atom->dynamicExport()), + _codeModel(atom->codeModel()), + _permissions(atom->permissions()), _size(atom->size()), + _sectionName(atom->customSectionName()), + _sectionSize(atom->sectionSize()) { + for (const lld::Reference *r : *atom) + _references.push_back(r); + if (!atom->occupiesDiskSpace()) + return; + ArrayRef<uint8_t> cont = atom->rawContent(); + _content.reserve(cont.size()); + for (uint8_t x : cont) + _content.push_back(x); + } + + ~NormalizedAtom() override = default; + + const lld::DefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_refName.empty()) + _refName = f->copyString(_refName); + if (!_sectionName.empty()) + _sectionName = f->copyString(_sectionName); + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created DefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + void bind(const RefNameResolver &); + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t size() const override { return _size; } + Scope scope() const override { return _scope; } + Interposable interposable() const override { return _interpose; } + Merge merge() const override { return _merge; } + ContentType contentType() const override { return _contentType; } + Alignment alignment() const override { return _alignment; } + SectionChoice sectionChoice() const override { return _sectionChoice; } + StringRef customSectionName() const override { return _sectionName; } + uint64_t sectionSize() const override { return _sectionSize; } + DeadStripKind deadStrip() const override { return _deadStrip; } + DynamicExport dynamicExport() const override { return _dynamicExport; } + CodeModel codeModel() const override { return _codeModel; } + ContentPermissions permissions() const override { return _permissions; } + ArrayRef<uint8_t> rawContent() const override { + if (!occupiesDiskSpace()) + return ArrayRef<uint8_t>(); + return ArrayRef<uint8_t>( + reinterpret_cast<const uint8_t *>(_content.data()), _content.size()); + } + + uint64_t ordinal() const override { return _ordinal; } + + reference_iterator begin() const override { + uintptr_t index = 0; + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + reference_iterator end() const override { + uintptr_t index = _references.size(); + const void *it = reinterpret_cast<const void *>(index); + return reference_iterator(*this, it); + } + const lld::Reference *derefIterator(const void *it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + assert(index < _references.size()); + return _references[index]; + } + void incrementIterator(const void *&it) const override { + uintptr_t index = reinterpret_cast<uintptr_t>(it); + ++index; + it = reinterpret_cast<const void *>(index); + } + + void addReference(Reference::KindNamespace ns, + Reference::KindArch arch, + Reference::KindValue kindValue, uint64_t off, + const Atom *target, Reference::Addend a) override { + assert(target && "trying to create reference to nothing"); + auto node = new (file().allocator()) SimpleReference(ns, arch, kindValue, + off, target, a); + _references.push_back(node); + } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Interposable _interpose; + Merge _merge; + ContentType _contentType; + Alignment _alignment; + SectionChoice _sectionChoice; + DeadStripKind _deadStrip; + DynamicExport _dynamicExport; + CodeModel _codeModel; + ContentPermissions _permissions; + uint32_t _ordinal; + std::vector<ImplicitHex8> _content; + uint64_t _size; + StringRef _sectionName; + uint64_t _sectionSize; + std::vector<const lld::Reference *> _references; + }; + + static void mapping(IO &io, const lld::DefinedAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::DefinedAtom *> keys( + io, atom, &info->_file->allocator()); + if (io.outputting()) { + // If writing YAML, check if atom needs a ref-name. + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapOptional("name", keys->_name, StringRef()); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope, + DefinedAtom::scopeTranslationUnit); + io.mapOptional("type", keys->_contentType, + DefinedAtom::typeCode); + io.mapOptional("content", keys->_content); + io.mapOptional("size", keys->_size, (uint64_t)keys->_content.size()); + io.mapOptional("interposable", keys->_interpose, + DefinedAtom::interposeNo); + io.mapOptional("merge", keys->_merge, DefinedAtom::mergeNo); + io.mapOptional("alignment", keys->_alignment, + DefinedAtom::Alignment(1)); + io.mapOptional("section-choice", keys->_sectionChoice, + DefinedAtom::sectionBasedOnContent); + io.mapOptional("section-name", keys->_sectionName, StringRef()); + io.mapOptional("section-size", keys->_sectionSize, (uint64_t)0); + io.mapOptional("dead-strip", keys->_deadStrip, + DefinedAtom::deadStripNormal); + io.mapOptional("dynamic-export", keys->_dynamicExport, + DefinedAtom::dynamicExportNormal); + io.mapOptional("code-model", keys->_codeModel, DefinedAtom::codeNA); + // default permissions based on content type + io.mapOptional("permissions", keys->_permissions, + DefinedAtom::permissions( + keys->_contentType)); + io.mapOptional("references", keys->_references); + } +}; + +template <> struct MappingTraits<lld::DefinedAtom *> { + static void mapping(IO &io, lld::DefinedAtom *&atom) { + const lld::DefinedAtom *atomPtr = atom; + MappingTraits<const lld::DefinedAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::DefinedAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::UndefinedAtom* +template <> struct MappingTraits<const lld::UndefinedAtom *> { + + class NormalizedAtom : public lld::UndefinedAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _canBeNull(canBeNullNever) {} + + NormalizedAtom(IO &io, const lld::UndefinedAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _canBeNull(atom->canBeNull()) {} + + ~NormalizedAtom() override = default; + + const lld::UndefinedAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created UndefinedAtom named: '" << _name + << "' (" << (const void *)_name.data() << ", " + << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + CanBeNull canBeNull() const override { return _canBeNull; } + + const lld::File &_file; + StringRef _name; + CanBeNull _canBeNull; + }; + + static void mapping(IO &io, const lld::UndefinedAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::UndefinedAtom *> keys( + io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("can-be-null", keys->_canBeNull, + lld::UndefinedAtom::canBeNullNever); + } +}; + +template <> struct MappingTraits<lld::UndefinedAtom *> { + static void mapping(IO &io, lld::UndefinedAtom *&atom) { + const lld::UndefinedAtom *atomPtr = atom; + MappingTraits<const lld::UndefinedAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::UndefinedAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::SharedLibraryAtom* +template <> struct MappingTraits<const lld::SharedLibraryAtom *> { + class NormalizedAtom : public lld::SharedLibraryAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _loadName(), _canBeNull(false), + _type(Type::Unknown), _size(0) {} + NormalizedAtom(IO &io, const lld::SharedLibraryAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _loadName(atom->loadName()), _canBeNull(atom->canBeNullAtRuntime()), + _type(atom->type()), _size(atom->size()) {} + + ~NormalizedAtom() override = default; + + const lld::SharedLibraryAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + if (!_loadName.empty()) + _loadName = f->copyString(_loadName); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created SharedLibraryAtom named: '" + << _name << "' (" + << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + StringRef loadName() const override { return _loadName; } + bool canBeNullAtRuntime() const override { return _canBeNull; } + Type type() const override { return _type; } + uint64_t size() const override { return _size; } + + const lld::File &_file; + StringRef _name; + StringRef _loadName; + ShlibCanBeNull _canBeNull; + Type _type; + uint64_t _size; + }; + + static void mapping(IO &io, const lld::SharedLibraryAtom *&atom) { + + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::SharedLibraryAtom *> + keys(io, atom, &info->_file->allocator()); + + io.mapRequired("name", keys->_name); + io.mapOptional("load-name", keys->_loadName); + io.mapOptional("can-be-null", keys->_canBeNull, (ShlibCanBeNull) false); + io.mapOptional("type", keys->_type, SharedLibraryAtom::Type::Code); + io.mapOptional("size", keys->_size, uint64_t(0)); + } +}; + +template <> struct MappingTraits<lld::SharedLibraryAtom *> { + static void mapping(IO &io, lld::SharedLibraryAtom *&atom) { + const lld::SharedLibraryAtom *atomPtr = atom; + MappingTraits<const lld::SharedLibraryAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::SharedLibraryAtom *>(atomPtr); + } +}; + +// YAML conversion for const lld::AbsoluteAtom* +template <> struct MappingTraits<const lld::AbsoluteAtom *> { + + class NormalizedAtom : public lld::AbsoluteAtom { + public: + NormalizedAtom(IO &io) + : _file(fileFromContext(io)), _name(), _scope(), _value(0) {} + NormalizedAtom(IO &io, const lld::AbsoluteAtom *atom) + : _file(fileFromContext(io)), _name(atom->name()), + _scope(atom->scope()), _value(atom->value()) {} + + ~NormalizedAtom() override = default; + + const lld::AbsoluteAtom *denormalize(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + if (!_name.empty()) + _name = f->copyString(_name); + + DEBUG_WITH_TYPE("WriterYAML", + llvm::dbgs() << "created AbsoluteAtom named: '" << _name + << "' (" << (const void *)_name.data() + << ", " << _name.size() << ")\n"); + return this; + } + // Extract current File object from YAML I/O parsing context + const lld::File &fileFromContext(IO &io) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + assert(info->_file != nullptr); + return *info->_file; + } + + const lld::File &file() const override { return _file; } + StringRef name() const override { return _name; } + uint64_t value() const override { return _value; } + Scope scope() const override { return _scope; } + + const lld::File &_file; + StringRef _name; + StringRef _refName; + Scope _scope; + Hex64 _value; + }; + + static void mapping(IO &io, const lld::AbsoluteAtom *&atom) { + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + MappingNormalizationHeap<NormalizedAtom, const lld::AbsoluteAtom *> keys( + io, atom, &info->_file->allocator()); + + if (io.outputting()) { + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + assert(f); + assert(f->_rnb); + if (f->_rnb->hasRefName(atom)) { + keys->_refName = f->_rnb->refName(atom); + } + } + + io.mapRequired("name", keys->_name); + io.mapOptional("ref-name", keys->_refName, StringRef()); + io.mapOptional("scope", keys->_scope); + io.mapRequired("value", keys->_value); + } +}; + +template <> struct MappingTraits<lld::AbsoluteAtom *> { + static void mapping(IO &io, lld::AbsoluteAtom *&atom) { + const lld::AbsoluteAtom *atomPtr = atom; + MappingTraits<const lld::AbsoluteAtom *>::mapping(io, atomPtr); + atom = const_cast<lld::AbsoluteAtom *>(atomPtr); + } +}; + +} // end namespace llvm +} // end namespace yaml + +RefNameResolver::RefNameResolver(const lld::File *file, IO &io) : _io(io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + for (const lld::DefinedAtom *a : file->defined()) { + const auto *na = (const NormalizedAtom *)a; + if (!na->_refName.empty()) + add(na->_refName, a); + else if (!na->_name.empty()) + add(na->_name, a); + } + + for (const lld::UndefinedAtom *a : file->undefined()) + add(a->name(), a); + + for (const lld::SharedLibraryAtom *a : file->sharedLibrary()) + add(a->name(), a); + + typedef MappingTraits<const lld::AbsoluteAtom *>::NormalizedAtom NormAbsAtom; + for (const lld::AbsoluteAtom *a : file->absolute()) { + const auto *na = (const NormAbsAtom *)a; + if (na->_refName.empty()) + add(na->_name, a); + else + add(na->_refName, a); + } +} + +inline const lld::File * +MappingTraits<const lld::File *>::NormalizedFile::denormalize(IO &io) { + typedef MappingTraits<const lld::DefinedAtom *>::NormalizedAtom + NormalizedAtom; + + RefNameResolver nameResolver(this, io); + // Now that all atoms are parsed, references can be bound. + for (const lld::DefinedAtom *a : this->defined()) { + auto *normAtom = (NormalizedAtom *)const_cast<DefinedAtom *>(a); + normAtom->bind(nameResolver); + } + + return this; +} + +inline void MappingTraits<const lld::DefinedAtom *>::NormalizedAtom::bind( + const RefNameResolver &resolver) { + typedef MappingTraits<const lld::Reference *>::NormalizedReference + NormalizedReference; + for (const lld::Reference *ref : _references) { + auto *normRef = (NormalizedReference *)const_cast<Reference *>(ref); + normRef->bind(resolver); + } +} + +inline void MappingTraits<const lld::Reference *>::NormalizedReference::bind( + const RefNameResolver &resolver) { + _target = resolver.lookup(_targetName); +} + +inline StringRef +MappingTraits<const lld::Reference *>::NormalizedReference::targetName( + IO &io, const lld::Reference *ref) { + if (ref->target() == nullptr) + return StringRef(); + YamlContext *info = reinterpret_cast<YamlContext *>(io.getContext()); + assert(info != nullptr); + typedef MappingTraits<const lld::File *>::NormalizedFile NormalizedFile; + NormalizedFile *f = reinterpret_cast<NormalizedFile *>(info->_file); + RefNameBuilder &rnb = *f->_rnb; + if (rnb.hasRefName(ref->target())) + return rnb.refName(ref->target()); + return ref->target()->name(); +} + +namespace lld { +namespace yaml { + +class Writer : public lld::Writer { +public: + Writer(const LinkingContext &context) : _ctx(context) {} + + llvm::Error writeFile(const lld::File &file, StringRef outPath) override { + // Create stream to path. + std::error_code ec; + llvm::raw_fd_ostream out(outPath, ec, llvm::sys::fs::F_Text); + if (ec) + return llvm::errorCodeToError(ec); + + // Create yaml Output writer, using yaml options for context. + YamlContext yamlContext; + yamlContext._ctx = &_ctx; + yamlContext._registry = &_ctx.registry(); + llvm::yaml::Output yout(out, &yamlContext); + + // Write yaml output. + const lld::File *fileRef = &file; + yout << fileRef; + + return llvm::Error(); + } + +private: + const LinkingContext &_ctx; +}; + +} // end namespace yaml + +namespace { + +/// Handles !native tagged yaml documents. +class NativeYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!native")) { + MappingTraits<const lld::File *>::mappingAtoms(io, file); + return true; + } + return false; + } +}; + +/// Handles !archive tagged yaml documents. +class ArchiveYamlIOTaggedDocumentHandler : public YamlIOTaggedDocumentHandler { + bool handledDocTag(llvm::yaml::IO &io, const lld::File *&file) const override { + if (io.mapTag("!archive")) { + MappingTraits<const lld::File *>::mappingArchive(io, file); + return true; + } + return false; + } +}; + +class YAMLReader : public Reader { +public: + YAMLReader(const Registry ®istry) : _registry(registry) {} + + bool canParse(file_magic magic, MemoryBufferRef mb) const override { + StringRef name = mb.getBufferIdentifier(); + return name.endswith(".objtxt") || name.endswith(".yaml"); + } + + ErrorOr<std::unique_ptr<File>> + loadFile(std::unique_ptr<MemoryBuffer> mb, + const class Registry &) const override { + // Create YAML Input Reader. + YamlContext yamlContext; + yamlContext._registry = &_registry; + yamlContext._path = mb->getBufferIdentifier(); + llvm::yaml::Input yin(mb->getBuffer(), &yamlContext); + + // Fill vector with File objects created by parsing yaml. + std::vector<const lld::File *> createdFiles; + yin >> createdFiles; + assert(createdFiles.size() == 1); + + // Error out now if there were parsing errors. + if (yin.error()) + return make_error_code(lld::YamlReaderError::illegal_value); + + std::shared_ptr<MemoryBuffer> smb(mb.release()); + const File *file = createdFiles[0]; + // Note: loadFile() should return vector of *const* File + File *f = const_cast<File *>(file); + f->setLastError(std::error_code()); + f->setSharedMemoryBuffer(smb); + return std::unique_ptr<File>(f); + } + +private: + const Registry &_registry; +}; + +} // end anonymous namespace + +void Registry::addSupportYamlFiles() { + add(std::unique_ptr<Reader>(new YAMLReader(*this))); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new NativeYamlIOTaggedDocumentHandler())); + add(std::unique_ptr<YamlIOTaggedDocumentHandler>( + new ArchiveYamlIOTaggedDocumentHandler())); +} + +std::unique_ptr<Writer> createWriterYAML(const LinkingContext &context) { + return std::unique_ptr<Writer>(new lld::yaml::Writer(context)); +} + +} // end namespace lld |
