1 files changed, 837 insertions, 0 deletions
diff --git a/gnu/llvm/clang/unittests/Tooling/Syntax/TokensTest.cpp b/gnu/llvm/clang/unittests/Tooling/Syntax/TokensTest.cpp
new file mode 100644
index 00000000000..b2ad3859104
--- /dev/null
+++ b/gnu/llvm/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -0,0 +1,837 @@
+//===- TokensTest.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Expr.h"
+#include "clang/Basic/Diagnostic.h"
+#include "clang/Basic/DiagnosticIDs.h"
+#include "clang/Basic/DiagnosticOptions.h"
+#include "clang/Basic/FileManager.h"
+#include "clang/Basic/FileSystemOptions.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/LangOptions.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Basic/TokenKinds.def"
+#include "clang/Basic/TokenKinds.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Frontend/Utils.h"
+#include "clang/Lex/Lexer.h"
+#include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Lex/Token.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/raw_os_ostream.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Testing/Support/Annotations.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gmock/gmock.h"
+#include <cassert>
+#include <cstdlib>
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include <memory>
+#include <ostream>
+#include <string>
+
+using namespace clang;
+using namespace clang::syntax;
+
+using llvm::ValueIs;
+using ::testing::AllOf;
+using ::testing::Contains;
+using ::testing::ElementsAre;
+using ::testing::Field;
+using ::testing::Matcher;
+using ::testing::Not;
+using ::testing::StartsWith;
+
+namespace {
+// Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
+// argument.
+MATCHER_P(SameRange, A, "") {
+  return A.begin() == arg.begin() && A.end() == arg.end();
+}
+
+Matcher<TokenBuffer::Expansion>
+IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
+            Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
+  return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
+               Field(&TokenBuffer::Expansion::Expanded, Expanded));
+}
+// Matchers for syntax::Token.
+MATCHER_P(Kind, K, "") { return arg.kind() == K; }
+MATCHER_P2(HasText, Text, SourceMgr, "") {
+  return arg.text(*SourceMgr) == Text;
+}
+/// Checks the start and end location of a token are equal to SourceRng.
+MATCHER_P(RangeIs, SourceRng, "") {
+  return arg.location() == SourceRng.first &&
+         arg.endLocation() == SourceRng.second;
+}
+
+class TokenCollectorTest : public ::testing::Test {
+public:
+  /// Run the clang frontend, collect the preprocessed tokens from the frontend
+  /// invocation and store them in this->Buffer.
+  /// This also clears SourceManager before running the compiler.
+  void recordTokens(llvm::StringRef Code) {
+    class RecordTokens : public ASTFrontendAction {
+    public:
+      explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
+
+      bool BeginSourceFileAction(CompilerInstance &CI) override {
+        assert(!Collector && "expected only a single call to BeginSourceFile");
+        Collector.emplace(CI.getPreprocessor());
+        return true;
+      }
+      void EndSourceFileAction() override {
+        assert(Collector && "BeginSourceFileAction was never called");
+        Result = std::move(*Collector).consume();
+      }
+
+      std::unique_ptr<ASTConsumer>
+      CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
+        return std::make_unique<ASTConsumer>();
+      }
+
+    private:
+      TokenBuffer &Result;
+      llvm::Optional<TokenCollector> Collector;
+    };
+
+    constexpr const char *FileName = "./input.cpp";
+    FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
+    // Prepare to run a compiler.
+    if (!Diags->getClient())
+      Diags->setClient(new IgnoringDiagConsumer);
+    std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
+                                      FileName};
+    auto CI = createInvocationFromCommandLine(Args, Diags, FS);
+    assert(CI);
+    CI->getFrontendOpts().DisableFree = false;
+    CI->getPreprocessorOpts().addRemappedFile(
+        FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
+    CompilerInstance Compiler;
+    Compiler.setInvocation(std::move(CI));
+    Compiler.setDiagnostics(Diags.get());
+    Compiler.setFileManager(FileMgr.get());
+    Compiler.setSourceManager(SourceMgr.get());
+
+    this->Buffer = TokenBuffer(*SourceMgr);
+    RecordTokens Recorder(this->Buffer);
+    ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
+        << "failed to run the frontend";
+  }
+
+  /// Record the tokens and return a test dump of the resulting buffer.
+  std::string collectAndDump(llvm::StringRef Code) {
+    recordTokens(Code);
+    return Buffer.dumpForTests();
+  }
+
+  // Adds a file to the test VFS.
+  void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
+    if (!FS->addFile(Path, time_t(),
+                     llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
+      ADD_FAILURE() << "could not add a file to VFS: " << Path;
+    }
+  }
+
+  /// Add a new file, run syntax::tokenize() on it and return the results.
+  std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
+    // FIXME: pass proper LangOptions.
+    return syntax::tokenize(
+        SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
+        *SourceMgr, LangOptions());
+  }
+
+  // Specialized versions of matchers that hide the SourceManager from clients.
+  Matcher<syntax::Token> HasText(std::string Text) const {
+    return ::HasText(Text, SourceMgr.get());
+  }
+  Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
+    std::pair<SourceLocation, SourceLocation> Ls;
+    Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
+                   .getLocWithOffset(R.Begin);
+    Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
+                    .getLocWithOffset(R.End);
+    return ::RangeIs(Ls);
+  }
+
+  /// Finds a subrange in O(n * m).
+  template <class T, class U, class Eq>
+  llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
+                                 llvm::ArrayRef<T> Range, Eq F) {
+    for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
+      auto It = Begin;
+      for (auto ItSub = Subrange.begin();
+           ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
+        if (!F(*ItSub, *It))
+          goto continue_outer;
+      }
+      return llvm::makeArrayRef(Begin, It);
+    continue_outer:;
+    }
+    return llvm::makeArrayRef(Range.end(), Range.end());
+  }
+
+  /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
+  /// The match should be unique. \p Query is a whitespace-separated list of
+  /// tokens to search for.
+  llvm::ArrayRef<syntax::Token>
+  findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
+    llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
+    Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+    if (QueryTokens.empty()) {
+      ADD_FAILURE() << "will not look for an empty list of tokens";
+      std::abort();
+    }
+    // An equality test for search.
+    auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
+      return Q == T.text(*SourceMgr);
+    };
+    // Find a match.
+    auto Found =
+        findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
+    if (Found.begin() == Tokens.end()) {
+      ADD_FAILURE() << "could not find the subrange for " << Query;
+      std::abort();
+    }
+    // Check that the match is unique.
+    if (findSubrange(llvm::makeArrayRef(QueryTokens),
+                     llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
+            .begin() != Tokens.end()) {
+      ADD_FAILURE() << "match is not unique for " << Query;
+      std::abort();
+    }
+    return Found;
+  };
+
+  // Specialized versions of findTokenRange for expanded and spelled tokens.
+  llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
+    return findTokenRange(Query, Buffer.expandedTokens());
+  }
+  llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
+                                            FileID File = FileID()) {
+    if (!File.isValid())
+      File = SourceMgr->getMainFileID();
+    return findTokenRange(Query, Buffer.spelledTokens(File));
+  }
+
+  // Data fields.
+  llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
+      new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
+  IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
+      new llvm::vfs::InMemoryFileSystem;
+  llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
+      new FileManager(FileSystemOptions(), FS);
+  llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
+      new SourceManager(*Diags, *FileMgr);
+  /// Contains last result of calling recordTokens().
+  TokenBuffer Buffer = TokenBuffer(*SourceMgr);
+};
+
+TEST_F(TokenCollectorTest, RawMode) {
+  EXPECT_THAT(tokenize("int main() {}"),
+              ElementsAre(Kind(tok::kw_int),
+                          AllOf(HasText("main"), Kind(tok::identifier)),
+                          Kind(tok::l_paren), Kind(tok::r_paren),
+                          Kind(tok::l_brace), Kind(tok::r_brace)));
+  // Comments are ignored for now.
+  EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
+              ElementsAre(Kind(tok::kw_int),
+                          AllOf(HasText("a"), Kind(tok::identifier)),
+                          Kind(tok::semi)));
+}
+
+TEST_F(TokenCollectorTest, Basic) {
+  std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
+      {"int main() {}",
+       R"(expanded tokens:
+  int main ( ) { }
+file './input.cpp'
+  spelled tokens:
+    int main ( ) { }
+  no mappings.
+)"},
+      // All kinds of whitespace are ignored.
+      {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
+       R"(expanded tokens:
+  int main ( ) { }
+file './input.cpp'
+  spelled tokens:
+    int main ( ) { }
+  no mappings.
+)"},
+      // Annotation tokens are ignored.
+      {R"cpp(
+        #pragma GCC visibility push (public)
+        #pragma GCC visibility pop
+      )cpp",
+       R"(expanded tokens:
+  <empty>
+file './input.cpp'
+  spelled tokens:
+    # pragma GCC visibility push ( public ) # pragma GCC visibility pop
+  mappings:
+    ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
+)"},
+      // Empty files should not crash.
+      {R"cpp()cpp", R"(expanded tokens:
+  <empty>
+file './input.cpp'
+  spelled tokens:
+    <empty>
+  no mappings.
+)"},
+      // Should not crash on errors inside '#define' directives. Error is that
+      // stringification (#B) does not refer to a macro parameter.
+      {
+          R"cpp(
+a
+#define MACRO() A #B
+)cpp",
+          R"(expanded tokens:
+  a
+file './input.cpp'
+  spelled tokens:
+    a # define MACRO ( ) A # B
+  mappings:
+    ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
+)"}};
+  for (auto &Test : TestCases)
+    EXPECT_EQ(collectAndDump(Test.first), Test.second)
+        << collectAndDump(Test.first);
+}
+
+TEST_F(TokenCollectorTest, Locations) {
+  // Check locations of the tokens.
+  llvm::Annotations Code(R"cpp(
+    $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
+  )cpp");
+  recordTokens(Code.code());
+  // Check expanded tokens.
+  EXPECT_THAT(
+      Buffer.expandedTokens(),
+      ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
+                  AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
+                  AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
+                  AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
+                  AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
+                  Kind(tok::eof)));
+  // Check spelled tokens.
+  EXPECT_THAT(
+      Buffer.spelledTokens(SourceMgr->getMainFileID()),
+      ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
+                  AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
+                  AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
+                  AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
+                  AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
+}
+
+TEST_F(TokenCollectorTest, MacroDirectives) {
+  // Macro directives are not stored anywhere at the moment.
+  std::string Code = R"cpp(
+    #define FOO a
+    #include "unresolved_file.h"
+    #undef FOO
+    #ifdef X
+    #else
+    #endif
+    #ifndef Y
+    #endif
+    #if 1
+    #elif 2
+    #else
+    #endif
+    #pragma once
+    #pragma something lalala
+
+    int a;
+  )cpp";
+  std::string Expected =
+      "expanded tokens:\n"
+      "  int a ;\n"
+      "file './input.cpp'\n"
+      "  spelled tokens:\n"
+      "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
+      "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
+      "# endif # pragma once # pragma something lalala int a ;\n"
+      "  mappings:\n"
+      "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
+  EXPECT_EQ(collectAndDump(Code), Expected);
+}
+
+TEST_F(TokenCollectorTest, MacroReplacements) {
+  std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
+      // A simple object-like macro.
+      {R"cpp(
+    #define INT int const
+    INT a;
+  )cpp",
+       R"(expanded tokens:
+  int const a ;
+file './input.cpp'
+  spelled tokens:
+    # define INT int const INT a ;
+  mappings:
+    ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
+    ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
+)"},
+      // A simple function-like macro.
+      {R"cpp(
+    #define INT(a) const int
+    INT(10+10) a;
+  )cpp",
+       R"(expanded tokens:
+  const int a ;
+file './input.cpp'
+  spelled tokens:
+    # define INT ( a ) const int INT ( 10 + 10 ) a ;
+  mappings:
+    ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
+    ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
+)"},
+      // Recursive macro replacements.
+      {R"cpp(
+    #define ID(X) X
+    #define INT int const
+    ID(ID(INT)) a;
+  )cpp",
+       R"(expanded tokens:
+  int const a ;
+file './input.cpp'
+  spelled tokens:
+    # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
+  mappings:
+    ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
+    ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
+)"},
+      // A little more complicated recursive macro replacements.
+      {R"cpp(
+    #define ADD(X, Y) X+Y
+    #define MULT(X, Y) X*Y
+
+    int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
+  )cpp",
+       "expanded tokens:\n"
+       "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
+       "file './input.cpp'\n"
+       "  spelled tokens:\n"
+       "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
+       "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
+       "  mappings:\n"
+       "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
+       "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
+      // Empty macro replacement.
+      // FIXME: the #define directives should not be glued together.
+      {R"cpp(
+    #define EMPTY
+    #define EMPTY_FUNC(X)
+    EMPTY
+    EMPTY_FUNC(1+2+3)
+    )cpp",
+       R"(expanded tokens:
+  <empty>
+file './input.cpp'
+  spelled tokens:
+    # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
+  mappings:
+    ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
+    ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
+    ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
+)"},
+      // File ends with a macro replacement.
+      {R"cpp(
+    #define FOO 10+10;
+    int a = FOO
+    )cpp",
+       R"(expanded tokens:
+  int a = 10 + 10 ;
+file './input.cpp'
+  spelled tokens:
+    # define FOO 10 + 10 ; int a = FOO
+  mappings:
+    ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
+    ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
+)"}};
+
+  for (auto &Test : TestCases)
+    EXPECT_EQ(Test.second, collectAndDump(Test.first))
+        << collectAndDump(Test.first);
+}
+
+TEST_F(TokenCollectorTest, SpecialTokens) {
+  // Tokens coming from concatenations.
+  recordTokens(R"cpp(
+    #define CONCAT(a, b) a ## b
+    int a = CONCAT(1, 2);
+  )cpp");
+  EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
+              Contains(HasText("12")));
+  // Multi-line tokens with slashes at the end.
+  recordTokens("i\\\nn\\\nt");
+  EXPECT_THAT(Buffer.expandedTokens(),
+              ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
+                          Kind(tok::eof)));
+  // FIXME: test tokens with digraphs and UCN identifiers.
+}
+
+TEST_F(TokenCollectorTest, LateBoundTokens) {
+  // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
+  // but we choose to record them as a single token (for now).
+  llvm::Annotations Code(R"cpp(
+    template <class T>
+    struct foo { int a; };
+    int bar = foo<foo<int$br[[>>]]().a;
+    int baz = 10 $op[[>>]] 2;
+  )cpp");
+  recordTokens(Code.code());
+  EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
+              AllOf(Contains(AllOf(Kind(tok::greatergreater),
+                                   RangeIs(Code.range("br")))),
+                    Contains(AllOf(Kind(tok::greatergreater),
+                                   RangeIs(Code.range("op"))))));
+}
+
+TEST_F(TokenCollectorTest, DelayedParsing) {
+  llvm::StringLiteral Code = R"cpp(
+    struct Foo {
+      int method() {
+        // Parser will visit method bodies and initializers multiple times, but
+        // TokenBuffer should only record the first walk over the tokens;
+        return 100;
+      }
+      int a = 10;
+
+      struct Subclass {
+        void foo() {
+          Foo().method();
+        }
+      };
+    };
+  )cpp";
+  std::string ExpectedTokens =
+      "expanded tokens:\n"
+      "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
+      "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
+  EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
+}
+
+TEST_F(TokenCollectorTest, MultiFile) {
+  addFile("./foo.h", R"cpp(
+    #define ADD(X, Y) X+Y
+    int a = 100;
+    #include "bar.h"
+  )cpp");
+  addFile("./bar.h", R"cpp(
+    int b = ADD(1, 2);
+    #define MULT(X, Y) X*Y
+  )cpp");
+  llvm::StringLiteral Code = R"cpp(
+    #include "foo.h"
+    int c = ADD(1, MULT(2,3));
+  )cpp";
+
+  std::string Expected = R"(expanded tokens:
+  int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
+file './input.cpp'
+  spelled tokens:
+    # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
+  mappings:
+    ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
+    ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
+file './foo.h'
+  spelled tokens:
+    # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
+  mappings:
+    ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
+    ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
+file './bar.h'
+  spelled tokens:
+    int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
+  mappings:
+    ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
+    ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
+)";
+
+  EXPECT_EQ(Expected, collectAndDump(Code))
+      << "input: " << Code << "\nresults: " << collectAndDump(Code);
+}
+
+class TokenBufferTest : public TokenCollectorTest {};
+
+TEST_F(TokenBufferTest, SpelledByExpanded) {
+  recordTokens(R"cpp(
+    a1 a2 a3 b1 b2
+  )cpp");
+
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
+  // Searching for subranges of expanded tokens should give the corresponding
+  // spelled ones.
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
+              ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+              ValueIs(SameRange(findSpelled("a1 a2 a3"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+              ValueIs(SameRange(findSpelled("b1 b2"))));
+
+  // Test search on simple macro expansions.
+  recordTokens(R"cpp(
+    #define A a1 a2 a3
+    #define B b1 b2
+
+    A split B
+  )cpp");
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
+              ValueIs(SameRange(findSpelled("A split B"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+              ValueIs(SameRange(findSpelled("A split").drop_back())));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+              ValueIs(SameRange(findSpelled("split B").drop_front())));
+  // Ranges not fully covering macro invocations should fail.
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
+            llvm::None);
+
+  // Recursive macro invocations.
+  recordTokens(R"cpp(
+    #define ID(x) x
+    #define B b1 b2
+
+    ID(ID(ID(a1) a2 a3)) split ID(B)
+  )cpp");
+
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
+              ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
+              ValueIs(SameRange(findSpelled("ID ( B )"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
+              ValueIs(SameRange(findSpelled(
+                  "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
+  // Ranges crossing macro call boundaries.
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
+            llvm::None);
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
+            llvm::None);
+  // FIXME: next two examples should map to macro arguments, but currently they
+  //        fail.
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
+  EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
+
+  // Empty macro expansions.
+  recordTokens(R"cpp(
+    #define EMPTY
+    #define ID(X) X
+
+    EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
+    EMPTY EMPTY ID(4 5 6) split2
+    ID(7 8 9) EMPTY EMPTY
+  )cpp");
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
+              ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
+              ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
+              ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
+
+  // Empty mappings coming from various directives.
+  recordTokens(R"cpp(
+    #define ID(X) X
+    ID(1)
+    #pragma lalala
+    not_mapped
+  )cpp");
+  EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
+              ValueIs(SameRange(findSpelled("not_mapped"))));
+}
+
+TEST_F(TokenBufferTest, ExpandedTokensForRange) {
+  recordTokens(R"cpp(
+    #define SIGN(X) X##_washere
+    A SIGN(B) C SIGN(D) E SIGN(F) G
+  )cpp");
+
+  SourceRange R(findExpanded("C").front().location(),
+                findExpanded("F_washere").front().location());
+  // Sanity check: expanded and spelled tokens are stored separately.
+  EXPECT_THAT(Buffer.expandedTokens(R),
+              SameRange(findExpanded("C D_washere E F_washere")));
+  EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
+}
+
+TEST_F(TokenBufferTest, ExpansionStartingAt) {
+  // Object-like macro expansions.
+  recordTokens(R"cpp(
+    #define FOO 3+4
+    int a = FOO 1;
+    int b = FOO 2;
+  )cpp");
+
+  llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
+  EXPECT_THAT(
+      Buffer.expansionStartingAt(Foo1.data()),
+      ValueIs(IsExpansion(SameRange(Foo1),
+                          SameRange(findExpanded("3 + 4 1").drop_back()))));
+
+  llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
+  EXPECT_THAT(
+      Buffer.expansionStartingAt(Foo2.data()),
+      ValueIs(IsExpansion(SameRange(Foo2),
+                          SameRange(findExpanded("3 + 4 2").drop_back()))));
+
+  // Function-like macro expansions.
+  recordTokens(R"cpp(
+    #define ID(X) X
+    int a = ID(1+2+3);
+    int b = ID(ID(2+3+4));
+  )cpp");
+
+  llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
+  EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
+              ValueIs(IsExpansion(SameRange(ID1),
+                                  SameRange(findExpanded("1 + 2 + 3")))));
+  // Only the first spelled token should be found.
+  for (const auto &T : ID1.drop_front())
+    EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
+
+  llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
+  EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
+              ValueIs(IsExpansion(SameRange(ID2),
+                                  SameRange(findExpanded("2 + 3 + 4")))));
+  // Only the first spelled token should be found.
+  for (const auto &T : ID2.drop_front())
+    EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
+
+  // PP directives.
+  recordTokens(R"cpp(
+#define FOO 1
+int a = FOO;
+#pragma once
+int b = 1;
+  )cpp");
+
+  llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
+  EXPECT_THAT(
+      Buffer.expansionStartingAt(&DefineFoo.front()),
+      ValueIs(IsExpansion(SameRange(DefineFoo),
+                          SameRange(findExpanded("int a").take_front(0)))));
+  // Only the first spelled token should be found.
+  for (const auto &T : DefineFoo.drop_front())
+    EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
+
+  llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
+  EXPECT_THAT(
+      Buffer.expansionStartingAt(&PragmaOnce.front()),
+      ValueIs(IsExpansion(SameRange(PragmaOnce),
+                          SameRange(findExpanded("int b").take_front(0)))));
+  // Only the first spelled token should be found.
+  for (const auto &T : PragmaOnce.drop_front())
+    EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
+}
+
+TEST_F(TokenBufferTest, TokensToFileRange) {
+  addFile("./foo.h", "token_from_header");
+  llvm::Annotations Code(R"cpp(
+    #define FOO token_from_expansion
+    #include "./foo.h"
+    $all[[$i[[int]] a = FOO;]]
+  )cpp");
+  recordTokens(Code.code());
+
+  auto &SM = *SourceMgr;
+
+  // Two simple examples.
+  auto Int = findExpanded("int").front();
+  auto Semi = findExpanded(";").front();
+  EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
+                                     Code.range("i").End));
+  EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
+            FileRange(SM.getMainFileID(), Code.range("all").Begin,
+                      Code.range("all").End));
+  // We don't test assertion failures because death tests are slow.
+}
+
+TEST_F(TokenBufferTest, MacroExpansions) {
+  llvm::Annotations Code(R"cpp(
+    #define FOO B
+    #define FOO2 BA
+    #define CALL(X) int X
+    #define G CALL(FOO2)
+    int B;
+    $macro[[FOO]];
+    $macro[[CALL]](A);
+    $macro[[G]];
+  )cpp");
+  recordTokens(Code.code());
+  auto &SM = *SourceMgr;
+  auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
+  std::vector<FileRange> ExpectedMacroRanges;
+  for (auto Range : Code.ranges("macro"))
+    ExpectedMacroRanges.push_back(
+        FileRange(SM.getMainFileID(), Range.Begin, Range.End));
+  std::vector<FileRange> ActualMacroRanges;
+  for (auto Expansion : Expansions)
+    ActualMacroRanges.push_back(Expansion->range(SM));
+  EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
+}
+
+TEST_F(TokenBufferTest, Touching) {
+  llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
+  recordTokens(Code.code());
+
+  auto Touching = [&](int Index) {
+    SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
+                                                   Code.points()[Index]);
+    return spelledTokensTouching(Loc, Buffer);
+  };
+  auto Identifier = [&](int Index) {
+    SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
+                                                   Code.points()[Index]);
+    const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
+    return Tok ? Tok->text(*SourceMgr) : "";
+  };
+
+  EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
+  EXPECT_EQ(Identifier(0), "");
+  EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
+  EXPECT_EQ(Identifier(1), "");
+  EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
+  EXPECT_EQ(Identifier(2), "");
+
+  EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
+  EXPECT_EQ(Identifier(3), "ab");
+  EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
+  EXPECT_EQ(Identifier(4), "ab");
+
+  EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
+  EXPECT_EQ(Identifier(5), "ab");
+
+  EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
+  EXPECT_EQ(Identifier(6), "");
+
+  EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
+  EXPECT_EQ(Identifier(7), "");
+
+  ASSERT_EQ(Code.points().size(), 8u);
+}
+
+} // namespace