diff options
Diffstat (limited to 'gnu/llvm/tools/clang/lib/Format')
30 files changed, 0 insertions, 18956 deletions
diff --git a/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp b/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp deleted file mode 100644 index b14316a14cd..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===--- AffectedRangeManager.cpp - Format C++ code -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements AffectRangeManager class. -/// -//===----------------------------------------------------------------------===// - -#include "AffectedRangeManager.h" - -#include "FormatToken.h" -#include "TokenAnnotator.h" - -namespace clang { -namespace format { - -bool AffectedRangeManager::computeAffectedLines( - SmallVectorImpl<AnnotatedLine *> &Lines) { - SmallVectorImpl<AnnotatedLine *>::iterator I = Lines.begin(); - SmallVectorImpl<AnnotatedLine *>::iterator E = Lines.end(); - bool SomeLineAffected = false; - const AnnotatedLine *PreviousLine = nullptr; - while (I != E) { - AnnotatedLine *Line = *I; - Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First); - - // If a line is part of a preprocessor directive, it needs to be formatted - // if any token within the directive is affected. - if (Line->InPPDirective) { - FormatToken *Last = Line->Last; - SmallVectorImpl<AnnotatedLine *>::iterator PPEnd = I + 1; - while (PPEnd != E && !(*PPEnd)->First->HasUnescapedNewline) { - Last = (*PPEnd)->Last; - ++PPEnd; - } - - if (affectsTokenRange(*Line->First, *Last, - /*IncludeLeadingNewlines=*/false)) { - SomeLineAffected = true; - markAllAsAffected(I, PPEnd); - } - I = PPEnd; - continue; - } - - if (nonPPLineAffected(Line, PreviousLine, Lines)) - SomeLineAffected = true; - - PreviousLine = Line; - ++I; - } - return SomeLineAffected; -} - -bool AffectedRangeManager::affectsCharSourceRange( - const CharSourceRange &Range) { - for (SmallVectorImpl<CharSourceRange>::const_iterator I = Ranges.begin(), - E = Ranges.end(); - I != E; ++I) { - if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), I->getBegin()) && - !SourceMgr.isBeforeInTranslationUnit(I->getEnd(), Range.getBegin())) - return true; - } - return false; -} - -bool AffectedRangeManager::affectsTokenRange(const FormatToken &First, - const FormatToken &Last, - bool IncludeLeadingNewlines) { - SourceLocation Start = First.WhitespaceRange.getBegin(); - if (!IncludeLeadingNewlines) - Start = Start.getLocWithOffset(First.LastNewlineOffset); - SourceLocation End = Last.getStartOfNonWhitespace(); - End = End.getLocWithOffset(Last.TokenText.size()); - CharSourceRange Range = CharSourceRange::getCharRange(Start, End); - return affectsCharSourceRange(Range); -} - -bool AffectedRangeManager::affectsLeadingEmptyLines(const FormatToken &Tok) { - CharSourceRange EmptyLineRange = CharSourceRange::getCharRange( - Tok.WhitespaceRange.getBegin(), - Tok.WhitespaceRange.getBegin().getLocWithOffset(Tok.LastNewlineOffset)); - return affectsCharSourceRange(EmptyLineRange); -} - -void AffectedRangeManager::markAllAsAffected( - SmallVectorImpl<AnnotatedLine *>::iterator I, - SmallVectorImpl<AnnotatedLine *>::iterator E) { - while (I != E) { - (*I)->Affected = true; - markAllAsAffected((*I)->Children.begin(), (*I)->Children.end()); - ++I; - } -} - -bool AffectedRangeManager::nonPPLineAffected( - AnnotatedLine *Line, const AnnotatedLine *PreviousLine, - SmallVectorImpl<AnnotatedLine *> &Lines) { - bool SomeLineAffected = false; - Line->ChildrenAffected = computeAffectedLines(Line->Children); - if (Line->ChildrenAffected) - SomeLineAffected = true; - - // Stores whether one of the line's tokens is directly affected. - bool SomeTokenAffected = false; - // Stores whether we need to look at the leading newlines of the next token - // in order to determine whether it was affected. - bool IncludeLeadingNewlines = false; - - // Stores whether the first child line of any of this line's tokens is - // affected. - bool SomeFirstChildAffected = false; - - for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { - // Determine whether 'Tok' was affected. - if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines)) - SomeTokenAffected = true; - - // Determine whether the first child of 'Tok' was affected. - if (!Tok->Children.empty() && Tok->Children.front()->Affected) - SomeFirstChildAffected = true; - - IncludeLeadingNewlines = Tok->Children.empty(); - } - - // Was this line moved, i.e. has it previously been on the same line as an - // affected line? - bool LineMoved = PreviousLine && PreviousLine->Affected && - Line->First->NewlinesBefore == 0; - - bool IsContinuedComment = - Line->First->is(tok::comment) && Line->First->Next == nullptr && - Line->First->NewlinesBefore < 2 && PreviousLine && - PreviousLine->Affected && PreviousLine->Last->is(tok::comment); - - bool IsAffectedClosingBrace = - Line->First->is(tok::r_brace) && - Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && - Lines[Line->MatchingOpeningBlockLineIndex]->Affected; - - if (SomeTokenAffected || SomeFirstChildAffected || LineMoved || - IsContinuedComment || IsAffectedClosingBrace) { - Line->Affected = true; - SomeLineAffected = true; - } - return SomeLineAffected; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.h b/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.h deleted file mode 100644 index b0c9dd259fb..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/AffectedRangeManager.h +++ /dev/null @@ -1,66 +0,0 @@ -//===--- AffectedRangeManager.h - Format C++ code ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// AffectedRangeManager class manages affected ranges in the code. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H -#define LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H - -#include "clang/Basic/SourceManager.h" - -namespace clang { -namespace format { - -struct FormatToken; -class AnnotatedLine; - -class AffectedRangeManager { -public: - AffectedRangeManager(const SourceManager &SourceMgr, - const ArrayRef<CharSourceRange> Ranges) - : SourceMgr(SourceMgr), Ranges(Ranges.begin(), Ranges.end()) {} - - // Determines which lines are affected by the SourceRanges given as input. - // Returns \c true if at least one line in \p Lines or one of their - // children is affected. - bool computeAffectedLines(SmallVectorImpl<AnnotatedLine *> &Lines); - - // Returns true if 'Range' intersects with one of the input ranges. - bool affectsCharSourceRange(const CharSourceRange &Range); - -private: - // Returns true if the range from 'First' to 'Last' intersects with one of the - // input ranges. - bool affectsTokenRange(const FormatToken &First, const FormatToken &Last, - bool IncludeLeadingNewlines); - - // Returns true if one of the input ranges intersect the leading empty lines - // before 'Tok'. - bool affectsLeadingEmptyLines(const FormatToken &Tok); - - // Marks all lines between I and E as well as all their children as affected. - void markAllAsAffected(SmallVectorImpl<AnnotatedLine *>::iterator I, - SmallVectorImpl<AnnotatedLine *>::iterator E); - - // Determines whether 'Line' is affected by the SourceRanges given as input. - // Returns \c true if line or one if its children is affected. - bool nonPPLineAffected(AnnotatedLine *Line, const AnnotatedLine *PreviousLine, - SmallVectorImpl<AnnotatedLine *> &Lines); - - const SourceManager &SourceMgr; - const SmallVector<CharSourceRange, 8> Ranges; -}; - -} // namespace format -} // namespace clang - -#endif // LLVM_CLANG_LIB_FORMAT_AFFECTEDRANGEMANAGER_H diff --git a/gnu/llvm/tools/clang/lib/Format/BreakableToken.cpp b/gnu/llvm/tools/clang/lib/Format/BreakableToken.cpp deleted file mode 100644 index e6ce01b520b..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/BreakableToken.cpp +++ /dev/null @@ -1,968 +0,0 @@ -//===--- BreakableToken.cpp - Format C++ code -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Contains implementation of BreakableToken class and classes derived -/// from it. -/// -//===----------------------------------------------------------------------===// - -#include "BreakableToken.h" -#include "ContinuationIndenter.h" -#include "clang/Basic/CharInfo.h" -#include "clang/Format/Format.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include <algorithm> - -#define DEBUG_TYPE "format-token-breaker" - -namespace clang { -namespace format { - -static const char *const Blanks = " \t\v\f\r"; -static bool IsBlank(char C) { - switch (C) { - case ' ': - case '\t': - case '\v': - case '\f': - case '\r': - return true; - default: - return false; - } -} - -static StringRef getLineCommentIndentPrefix(StringRef Comment, - const FormatStyle &Style) { - static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//", - "//!"}; - static const char *const KnownTextProtoPrefixes[] = {"//", "#", "##", "###", - "####"}; - ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes); - if (Style.Language == FormatStyle::LK_TextProto) - KnownPrefixes = KnownTextProtoPrefixes; - - StringRef LongestPrefix; - for (StringRef KnownPrefix : KnownPrefixes) { - if (Comment.startswith(KnownPrefix)) { - size_t PrefixLength = KnownPrefix.size(); - while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') - ++PrefixLength; - if (PrefixLength > LongestPrefix.size()) - LongestPrefix = Comment.substr(0, PrefixLength); - } - } - return LongestPrefix; -} - -static BreakableToken::Split getCommentSplit(StringRef Text, - unsigned ContentStartColumn, - unsigned ColumnLimit, - unsigned TabWidth, - encoding::Encoding Encoding, - const FormatStyle &Style) { - LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text - << "\", Column limit: " << ColumnLimit - << ", Content start: " << ContentStartColumn << "\n"); - if (ColumnLimit <= ContentStartColumn + 1) - return BreakableToken::Split(StringRef::npos, 0); - - unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; - unsigned MaxSplitBytes = 0; - - for (unsigned NumChars = 0; - NumChars < MaxSplit && MaxSplitBytes < Text.size();) { - unsigned BytesInChar = - encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); - NumChars += - encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), - ContentStartColumn, TabWidth, Encoding); - MaxSplitBytes += BytesInChar; - } - - StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); - - static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\."); - while (SpaceOffset != StringRef::npos) { - // Do not split before a number followed by a dot: this would be interpreted - // as a numbered list, which would prevent re-flowing in subsequent passes. - if (kNumberedListRegexp->match(Text.substr(SpaceOffset).ltrim(Blanks))) - SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); - // In JavaScript, some @tags can be followed by {, and machinery that parses - // these comments will fail to understand the comment if followed by a line - // break. So avoid ever breaking before a {. - else if (Style.Language == FormatStyle::LK_JavaScript && - SpaceOffset + 1 < Text.size() && Text[SpaceOffset + 1] == '{') - SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); - else - break; - } - - if (SpaceOffset == StringRef::npos || - // Don't break at leading whitespace. - Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { - // Make sure that we don't break at leading whitespace that - // reaches past MaxSplit. - StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); - if (FirstNonWhitespace == StringRef::npos) - // If the comment is only whitespace, we cannot split. - return BreakableToken::Split(StringRef::npos, 0); - SpaceOffset = Text.find_first_of( - Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); - } - if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { - // adaptStartOfLine will break after lines starting with /** if the comment - // is broken anywhere. Avoid emitting this break twice here. - // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will - // insert a break after /**, so this code must not insert the same break. - if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*') - return BreakableToken::Split(StringRef::npos, 0); - StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); - StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); - return BreakableToken::Split(BeforeCut.size(), - AfterCut.begin() - BeforeCut.end()); - } - return BreakableToken::Split(StringRef::npos, 0); -} - -static BreakableToken::Split -getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, - unsigned TabWidth, encoding::Encoding Encoding) { - // FIXME: Reduce unit test case. - if (Text.empty()) - return BreakableToken::Split(StringRef::npos, 0); - if (ColumnLimit <= UsedColumns) - return BreakableToken::Split(StringRef::npos, 0); - unsigned MaxSplit = ColumnLimit - UsedColumns; - StringRef::size_type SpaceOffset = 0; - StringRef::size_type SlashOffset = 0; - StringRef::size_type WordStartOffset = 0; - StringRef::size_type SplitPoint = 0; - for (unsigned Chars = 0;;) { - unsigned Advance; - if (Text[0] == '\\') { - Advance = encoding::getEscapeSequenceLength(Text); - Chars += Advance; - } else { - Advance = encoding::getCodePointNumBytes(Text[0], Encoding); - Chars += encoding::columnWidthWithTabs( - Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); - } - - if (Chars > MaxSplit || Text.size() <= Advance) - break; - - if (IsBlank(Text[0])) - SpaceOffset = SplitPoint; - if (Text[0] == '/') - SlashOffset = SplitPoint; - if (Advance == 1 && !isAlphanumeric(Text[0])) - WordStartOffset = SplitPoint; - - SplitPoint += Advance; - Text = Text.substr(Advance); - } - - if (SpaceOffset != 0) - return BreakableToken::Split(SpaceOffset + 1, 0); - if (SlashOffset != 0) - return BreakableToken::Split(SlashOffset + 1, 0); - if (WordStartOffset != 0) - return BreakableToken::Split(WordStartOffset + 1, 0); - if (SplitPoint != 0) - return BreakableToken::Split(SplitPoint, 0); - return BreakableToken::Split(StringRef::npos, 0); -} - -bool switchesFormatting(const FormatToken &Token) { - assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && - "formatting regions are switched by comment tokens"); - StringRef Content = Token.TokenText.substr(2).ltrim(); - return Content.startswith("clang-format on") || - Content.startswith("clang-format off"); -} - -unsigned -BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns, - Split Split) const { - // Example: consider the content - // lala lala - // - RemainingTokenColumns is the original number of columns, 10; - // - Split is (4, 2), denoting the two spaces between the two words; - // - // We compute the number of columns when the split is compressed into a single - // space, like: - // lala lala - // - // FIXME: Correctly measure the length of whitespace in Split.second so it - // works with tabs. - return RemainingTokenColumns + 1 - Split.second; -} - -unsigned BreakableStringLiteral::getLineCount() const { return 1; } - -unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, - unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const { - llvm_unreachable("Getting the length of a part of the string literal " - "indicates that the code tries to reflow it."); -} - -unsigned -BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, - unsigned StartColumn) const { - return UnbreakableTailLength + Postfix.size() + - encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos), - StartColumn, Style.TabWidth, Encoding); -} - -unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, - bool Break) const { - return StartColumn + Prefix.size(); -} - -BreakableStringLiteral::BreakableStringLiteral( - const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, - StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableToken(Tok, InPPDirective, Encoding, Style), - StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), - UnbreakableTailLength(UnbreakableTailLength) { - assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); - Line = Tok.TokenText.substr( - Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); -} - -BreakableToken::Split BreakableStringLiteral::getSplit( - unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, - unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const { - return getStringSplit(Line.substr(TailOffset), ContentStartColumn, - ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); -} - -void BreakableStringLiteral::insertBreak(unsigned LineIndex, - unsigned TailOffset, Split Split, - unsigned ContentIndent, - WhitespaceManager &Whitespaces) const { - Whitespaces.replaceWhitespaceInToken( - Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, - Prefix, InPPDirective, 1, StartColumn); -} - -BreakableComment::BreakableComment(const FormatToken &Token, - unsigned StartColumn, bool InPPDirective, - encoding::Encoding Encoding, - const FormatStyle &Style) - : BreakableToken(Token, InPPDirective, Encoding, Style), - StartColumn(StartColumn) {} - -unsigned BreakableComment::getLineCount() const { return Lines.size(); } - -BreakableToken::Split -BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, unsigned ContentStartColumn, - llvm::Regex &CommentPragmasRegex) const { - // Don't break lines matching the comment pragmas regex. - if (CommentPragmasRegex.match(Content[LineIndex])) - return Split(StringRef::npos, 0); - return getCommentSplit(Content[LineIndex].substr(TailOffset), - ContentStartColumn, ColumnLimit, Style.TabWidth, - Encoding, Style); -} - -void BreakableComment::compressWhitespace( - unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) const { - StringRef Text = Content[LineIndex].substr(TailOffset); - // Text is relative to the content line, but Whitespaces operates relative to - // the start of the corresponding token, so compute the start of the Split - // that needs to be compressed into a single space relative to the start of - // its token. - unsigned BreakOffsetInToken = - Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", - /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); -} - -const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { - return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; -} - -static bool mayReflowContent(StringRef Content) { - Content = Content.trim(Blanks); - // Lines starting with '@' commonly have special meaning. - // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. - bool hasSpecialMeaningPrefix = false; - for (StringRef Prefix : - {"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) { - if (Content.startswith(Prefix)) { - hasSpecialMeaningPrefix = true; - break; - } - } - - // Numbered lists may also start with a number followed by '.' - // To avoid issues if a line starts with a number which is actually the end - // of a previous line, we only consider numbers with up to 2 digits. - static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\. "); - hasSpecialMeaningPrefix = - hasSpecialMeaningPrefix || kNumberedListRegexp->match(Content); - - // Simple heuristic for what to reflow: content should contain at least two - // characters and either the first or second character must be - // non-punctuation. - return Content.size() >= 2 && !hasSpecialMeaningPrefix && - !Content.endswith("\\") && - // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is - // true, then the first code point must be 1 byte long. - (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); -} - -BreakableBlockComment::BreakableBlockComment( - const FormatToken &Token, unsigned StartColumn, - unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style), - DelimitersOnNewline(false), - UnbreakableTailLength(Token.UnbreakableTailLength) { - assert(Tok.is(TT_BlockComment) && - "block comment section must start with a block comment"); - - StringRef TokenText(Tok.TokenText); - assert(TokenText.startswith("/*") && TokenText.endswith("*/")); - TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); - - int IndentDelta = StartColumn - OriginalStartColumn; - Content.resize(Lines.size()); - Content[0] = Lines[0]; - ContentColumn.resize(Lines.size()); - // Account for the initial '/*'. - ContentColumn[0] = StartColumn + 2; - Tokens.resize(Lines.size()); - for (size_t i = 1; i < Lines.size(); ++i) - adjustWhitespace(i, IndentDelta); - - // Align decorations with the column of the star on the first line, - // that is one column after the start "/*". - DecorationColumn = StartColumn + 1; - - // Account for comment decoration patterns like this: - // - // /* - // ** blah blah blah - // */ - if (Lines.size() >= 2 && Content[1].startswith("**") && - static_cast<unsigned>(ContentColumn[1]) == StartColumn) { - DecorationColumn = StartColumn; - } - - Decoration = "* "; - if (Lines.size() == 1 && !FirstInLine) { - // Comments for which FirstInLine is false can start on arbitrary column, - // and available horizontal space can be too small to align consecutive - // lines with the first one. - // FIXME: We could, probably, align them to current indentation level, but - // now we just wrap them without stars. - Decoration = ""; - } - for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { - // If the last line is empty, the closing "*/" will have a star. - if (i + 1 == e && Content[i].empty()) - break; - if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i])) - continue; - while (!Content[i].startswith(Decoration)) - Decoration = Decoration.substr(0, Decoration.size() - 1); - } - - LastLineNeedsDecoration = true; - IndentAtLineBreak = ContentColumn[0] + 1; - for (size_t i = 1, e = Lines.size(); i < e; ++i) { - if (Content[i].empty()) { - if (i + 1 == e) { - // Empty last line means that we already have a star as a part of the - // trailing */. We also need to preserve whitespace, so that */ is - // correctly indented. - LastLineNeedsDecoration = false; - // Align the star in the last '*/' with the stars on the previous lines. - if (e >= 2 && !Decoration.empty()) { - ContentColumn[i] = DecorationColumn; - } - } else if (Decoration.empty()) { - // For all other lines, set the start column to 0 if they're empty, so - // we do not insert trailing whitespace anywhere. - ContentColumn[i] = 0; - } - continue; - } - - // The first line already excludes the star. - // The last line excludes the star if LastLineNeedsDecoration is false. - // For all other lines, adjust the line to exclude the star and - // (optionally) the first whitespace. - unsigned DecorationSize = Decoration.startswith(Content[i]) - ? Content[i].size() - : Decoration.size(); - if (DecorationSize) { - ContentColumn[i] = DecorationColumn + DecorationSize; - } - Content[i] = Content[i].substr(DecorationSize); - if (!Decoration.startswith(Content[i])) - IndentAtLineBreak = - std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); - } - IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); - - // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case. - if (Style.Language == FormatStyle::LK_JavaScript || - Style.Language == FormatStyle::LK_Java) { - if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) { - // This is a multiline jsdoc comment. - DelimitersOnNewline = true; - } else if (Lines[0].startswith("* ") && Lines.size() == 1) { - // Detect a long single-line comment, like: - // /** long long long */ - // Below, '2' is the width of '*/'. - unsigned EndColumn = - ContentColumn[0] + - encoding::columnWidthWithTabs(Lines[0], ContentColumn[0], - Style.TabWidth, Encoding) + - 2; - DelimitersOnNewline = EndColumn > Style.ColumnLimit; - } - } - - LLVM_DEBUG({ - llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; - llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; - for (size_t i = 0; i < Lines.size(); ++i) { - llvm::dbgs() << i << " |" << Content[i] << "| " - << "CC=" << ContentColumn[i] << "| " - << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; - } - }); -} - -void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, - int IndentDelta) { - // When in a preprocessor directive, the trailing backslash in a block comment - // is not needed, but can serve a purpose of uniformity with necessary escaped - // newlines outside the comment. In this case we remove it here before - // trimming the trailing whitespace. The backslash will be re-added later when - // inserting a line break. - size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); - if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) - --EndOfPreviousLine; - - // Calculate the end of the non-whitespace text in the previous line. - EndOfPreviousLine = - Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); - if (EndOfPreviousLine == StringRef::npos) - EndOfPreviousLine = 0; - else - ++EndOfPreviousLine; - // Calculate the start of the non-whitespace text in the current line. - size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); - if (StartOfLine == StringRef::npos) - StartOfLine = Lines[LineIndex].rtrim("\r\n").size(); - - StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); - // Adjust Lines to only contain relevant text. - size_t PreviousContentOffset = - Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); - Content[LineIndex - 1] = Lines[LineIndex - 1].substr( - PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); - Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); - - // Adjust the start column uniformly across all lines. - ContentColumn[LineIndex] = - encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + - IndentDelta; -} - -unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, - unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const { - unsigned LineLength = - encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length), - StartColumn, Style.TabWidth, Encoding); - // FIXME: This should go into getRemainingLength instead, but we currently - // break tests when putting it there. Investigate how to fix those tests. - // The last line gets a "*/" postfix. - if (LineIndex + 1 == Lines.size()) { - LineLength += 2; - // We never need a decoration when breaking just the trailing "*/" postfix. - // Note that checking that Length == 0 is not enough, since Length could - // also be StringRef::npos. - if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) { - LineLength -= Decoration.size(); - } - } - return LineLength; -} - -unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, - unsigned Offset, - unsigned StartColumn) const { - return UnbreakableTailLength + - getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); -} - -unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, - bool Break) const { - if (Break) - return IndentAtLineBreak; - return std::max(0, ContentColumn[LineIndex]); -} - -const llvm::StringSet<> - BreakableBlockComment::ContentIndentingJavadocAnnotations = { - "@param", "@return", "@returns", "@throws", "@type", "@template", - "@see", "@deprecated", "@define", "@exports", "@mods", "@private", -}; - -unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { - if (Style.Language != FormatStyle::LK_Java && - Style.Language != FormatStyle::LK_JavaScript) - return 0; - // The content at LineIndex 0 of a comment like: - // /** line 0 */ - // is "* line 0", so we need to skip over the decoration in that case. - StringRef ContentWithNoDecoration = Content[LineIndex]; - if (LineIndex == 0 && ContentWithNoDecoration.startswith("*")) { - ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks); - } - StringRef FirstWord = ContentWithNoDecoration.substr( - 0, ContentWithNoDecoration.find_first_of(Blanks)); - if (ContentIndentingJavadocAnnotations.find(FirstWord) != - ContentIndentingJavadocAnnotations.end()) - return Style.ContinuationIndentWidth; - return 0; -} - -void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, - Split Split, unsigned ContentIndent, - WhitespaceManager &Whitespaces) const { - StringRef Text = Content[LineIndex].substr(TailOffset); - StringRef Prefix = Decoration; - // We need this to account for the case when we have a decoration "* " for all - // the lines except for the last one, where the star in "*/" acts as a - // decoration. - unsigned LocalIndentAtLineBreak = IndentAtLineBreak; - if (LineIndex + 1 == Lines.size() && - Text.size() == Split.first + Split.second) { - // For the last line we need to break before "*/", but not to add "* ". - Prefix = ""; - if (LocalIndentAtLineBreak >= 2) - LocalIndentAtLineBreak -= 2; - } - // The split offset is from the beginning of the line. Convert it to an offset - // from the beginning of the token text. - unsigned BreakOffsetInToken = - Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - assert(LocalIndentAtLineBreak >= Prefix.size()); - std::string PrefixWithTrailingIndent = Prefix; - for (unsigned I = 0; I < ContentIndent; ++I) - PrefixWithTrailingIndent += " "; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", - PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, - /*Spaces=*/LocalIndentAtLineBreak + ContentIndent - - PrefixWithTrailingIndent.size()); -} - -BreakableToken::Split -BreakableBlockComment::getReflowSplit(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const { - if (!mayReflow(LineIndex, CommentPragmasRegex)) - return Split(StringRef::npos, 0); - - // If we're reflowing into a line with content indent, only reflow the next - // line if its starting whitespace matches the content indent. - size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); - if (LineIndex) { - unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); - if (PreviousContentIndent && Trimmed != StringRef::npos && - Trimmed != PreviousContentIndent) - return Split(StringRef::npos, 0); - } - - return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); -} - -bool BreakableBlockComment::introducesBreakBeforeToken() const { - // A break is introduced when we want delimiters on newline. - return DelimitersOnNewline && - Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; -} - -void BreakableBlockComment::reflow(unsigned LineIndex, - WhitespaceManager &Whitespaces) const { - StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); - // Here we need to reflow. - assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && - "Reflowing whitespace within a token"); - // This is the offset of the end of the last line relative to the start of - // the token text in the token. - unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + - Content[LineIndex - 1].size() - - tokenAt(LineIndex).TokenText.data(); - unsigned WhitespaceLength = TrimmedContent.data() - - tokenAt(LineIndex).TokenText.data() - - WhitespaceOffsetInToken; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), WhitespaceOffsetInToken, - /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, - /*Spaces=*/0); -} - -void BreakableBlockComment::adaptStartOfLine( - unsigned LineIndex, WhitespaceManager &Whitespaces) const { - if (LineIndex == 0) { - if (DelimitersOnNewline) { - // Since we're breaking at index 1 below, the break position and the - // break length are the same. - // Note: this works because getCommentSplit is careful never to split at - // the beginning of a line. - size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); - if (BreakLength != StringRef::npos) - insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, - Whitespaces); - } - return; - } - // Here no reflow with the previous line will happen. - // Fix the decoration of the line at LineIndex. - StringRef Prefix = Decoration; - if (Content[LineIndex].empty()) { - if (LineIndex + 1 == Lines.size()) { - if (!LastLineNeedsDecoration) { - // If the last line was empty, we don't need a prefix, as the */ will - // line up with the decoration (if it exists). - Prefix = ""; - } - } else if (!Decoration.empty()) { - // For other empty lines, if we do have a decoration, adapt it to not - // contain a trailing whitespace. - Prefix = Prefix.substr(0, 1); - } - } else { - if (ContentColumn[LineIndex] == 1) { - // This line starts immediately after the decorating *. - Prefix = Prefix.substr(0, 1); - } - } - // This is the offset of the end of the last line relative to the start of the - // token text in the token. - unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + - Content[LineIndex - 1].size() - - tokenAt(LineIndex).TokenText.data(); - unsigned WhitespaceLength = Content[LineIndex].data() - - tokenAt(LineIndex).TokenText.data() - - WhitespaceOffsetInToken; - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, - InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); -} - -BreakableToken::Split -BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { - if (DelimitersOnNewline) { - // Replace the trailing whitespace of the last line with a newline. - // In case the last line is empty, the ending '*/' is already on its own - // line. - StringRef Line = Content.back().substr(TailOffset); - StringRef TrimmedLine = Line.rtrim(Blanks); - if (!TrimmedLine.empty()) - return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size()); - } - return Split(StringRef::npos, 0); -} - -bool BreakableBlockComment::mayReflow(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const { - // Content[LineIndex] may exclude the indent after the '*' decoration. In that - // case, we compute the start of the comment pragma manually. - StringRef IndentContent = Content[LineIndex]; - if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { - IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); - } - return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && - mayReflowContent(Content[LineIndex]) && !Tok.Finalized && - !switchesFormatting(tokenAt(LineIndex)); -} - -BreakableLineCommentSection::BreakableLineCommentSection( - const FormatToken &Token, unsigned StartColumn, - unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { - assert(Tok.is(TT_LineComment) && - "line comment section must start with a line comment"); - FormatToken *LineTok = nullptr; - for (const FormatToken *CurrentTok = &Tok; - CurrentTok && CurrentTok->is(TT_LineComment); - CurrentTok = CurrentTok->Next) { - LastLineTok = LineTok; - StringRef TokenText(CurrentTok->TokenText); - assert((TokenText.startswith("//") || TokenText.startswith("#")) && - "unsupported line comment prefix, '//' and '#' are supported"); - size_t FirstLineIndex = Lines.size(); - TokenText.split(Lines, "\n"); - Content.resize(Lines.size()); - ContentColumn.resize(Lines.size()); - OriginalContentColumn.resize(Lines.size()); - Tokens.resize(Lines.size()); - Prefix.resize(Lines.size()); - OriginalPrefix.resize(Lines.size()); - for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { - Lines[i] = Lines[i].ltrim(Blanks); - // We need to trim the blanks in case this is not the first line in a - // multiline comment. Then the indent is included in Lines[i]. - StringRef IndentPrefix = - getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style); - assert((TokenText.startswith("//") || TokenText.startswith("#")) && - "unsupported line comment prefix, '//' and '#' are supported"); - OriginalPrefix[i] = Prefix[i] = IndentPrefix; - if (Lines[i].size() > Prefix[i].size() && - isAlphanumeric(Lines[i][Prefix[i].size()])) { - if (Prefix[i] == "//") - Prefix[i] = "// "; - else if (Prefix[i] == "///") - Prefix[i] = "/// "; - else if (Prefix[i] == "//!") - Prefix[i] = "//! "; - else if (Prefix[i] == "///<") - Prefix[i] = "///< "; - else if (Prefix[i] == "//!<") - Prefix[i] = "//!< "; - else if (Prefix[i] == "#" && - Style.Language == FormatStyle::LK_TextProto) - Prefix[i] = "# "; - } - - Tokens[i] = LineTok; - Content[i] = Lines[i].substr(IndentPrefix.size()); - OriginalContentColumn[i] = - StartColumn + encoding::columnWidthWithTabs(OriginalPrefix[i], - StartColumn, - Style.TabWidth, Encoding); - ContentColumn[i] = - StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn, - Style.TabWidth, Encoding); - - // Calculate the end of the non-whitespace text in this line. - size_t EndOfLine = Content[i].find_last_not_of(Blanks); - if (EndOfLine == StringRef::npos) - EndOfLine = Content[i].size(); - else - ++EndOfLine; - Content[i] = Content[i].substr(0, EndOfLine); - } - LineTok = CurrentTok->Next; - if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { - // A line comment section needs to broken by a line comment that is - // preceded by at least two newlines. Note that we put this break here - // instead of breaking at a previous stage during parsing, since that - // would split the contents of the enum into two unwrapped lines in this - // example, which is undesirable: - // enum A { - // a, // comment about a - // - // // comment about b - // b - // }; - // - // FIXME: Consider putting separate line comment sections as children to - // the unwrapped line instead. - break; - } - } -} - -unsigned -BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const { - return encoding::columnWidthWithTabs( - Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, - Encoding); -} - -unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, - bool Break) const { - if (Break) - return OriginalContentColumn[LineIndex]; - return ContentColumn[LineIndex]; -} - -void BreakableLineCommentSection::insertBreak( - unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, WhitespaceManager &Whitespaces) const { - StringRef Text = Content[LineIndex].substr(TailOffset); - // Compute the offset of the split relative to the beginning of the token - // text. - unsigned BreakOffsetInToken = - Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; - unsigned CharsToRemove = Split.second; - // Compute the size of the new indent, including the size of the new prefix of - // the newly broken line. - unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] + - Prefix[LineIndex].size() - - OriginalPrefix[LineIndex].size(); - assert(IndentAtLineBreak >= Prefix[LineIndex].size()); - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", - Prefix[LineIndex], InPPDirective, /*Newlines=*/1, - /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); -} - -BreakableComment::Split BreakableLineCommentSection::getReflowSplit( - unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { - if (!mayReflow(LineIndex, CommentPragmasRegex)) - return Split(StringRef::npos, 0); - - size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); - - // In a line comment section each line is a separate token; thus, after a - // split we replace all whitespace before the current line comment token - // (which does not need to be included in the split), plus the start of the - // line up to where the content starts. - return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); -} - -void BreakableLineCommentSection::reflow(unsigned LineIndex, - WhitespaceManager &Whitespaces) const { - if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { - // Reflow happens between tokens. Replace the whitespace between the - // tokens by the empty string. - Whitespaces.replaceWhitespace( - *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, - /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); - } else if (LineIndex > 0) { - // In case we're reflowing after the '\' in: - // - // // line comment \ - // // line 2 - // - // the reflow happens inside the single comment token (it is a single line - // comment with an unescaped newline). - // Replace the whitespace between the '\' and '//' with the empty string. - // - // Offset points to after the '\' relative to start of the token. - unsigned Offset = Lines[LineIndex - 1].data() + - Lines[LineIndex - 1].size() - - tokenAt(LineIndex - 1).TokenText.data(); - // WhitespaceLength is the number of chars between the '\' and the '//' on - // the next line. - unsigned WhitespaceLength = - Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset; - Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], - Offset, - /*ReplaceChars=*/WhitespaceLength, - /*PreviousPostfix=*/"", - /*CurrentPrefix=*/"", - /*InPPDirective=*/false, - /*Newlines=*/0, - /*Spaces=*/0); - - } - // Replace the indent and prefix of the token with the reflow prefix. - unsigned Offset = - Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); - unsigned WhitespaceLength = - Content[LineIndex].data() - Lines[LineIndex].data(); - Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], - Offset, - /*ReplaceChars=*/WhitespaceLength, - /*PreviousPostfix=*/"", - /*CurrentPrefix=*/ReflowPrefix, - /*InPPDirective=*/false, - /*Newlines=*/0, - /*Spaces=*/0); -} - -void BreakableLineCommentSection::adaptStartOfLine( - unsigned LineIndex, WhitespaceManager &Whitespaces) const { - // If this is the first line of a token, we need to inform Whitespace Manager - // about it: either adapt the whitespace range preceding it, or mark it as an - // untouchable token. - // This happens for instance here: - // // line 1 \ - // // line 2 - if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { - // This is the first line for the current token, but no reflow with the - // previous token is necessary. However, we still may need to adjust the - // start column. Note that ContentColumn[LineIndex] is the expected - // content column after a possible update to the prefix, hence the prefix - // length change is included. - unsigned LineColumn = - ContentColumn[LineIndex] - - (Content[LineIndex].data() - Lines[LineIndex].data()) + - (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); - - // We always want to create a replacement instead of adding an untouchable - // token, even if LineColumn is the same as the original column of the - // token. This is because WhitespaceManager doesn't align trailing - // comments if they are untouchable. - Whitespaces.replaceWhitespace(*Tokens[LineIndex], - /*Newlines=*/1, - /*Spaces=*/LineColumn, - /*StartOfTokenColumn=*/LineColumn, - /*InPPDirective=*/false); - } - if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { - // Adjust the prefix if necessary. - - // Take care of the space possibly introduced after a decoration. - assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() && - "Expecting a line comment prefix to differ from original by at most " - "a space"); - Whitespaces.replaceWhitespaceInToken( - tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", - /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); - } -} - -void BreakableLineCommentSection::updateNextToken(LineState &State) const { - if (LastLineTok) { - State.NextToken = LastLineTok->Next; - } -} - -bool BreakableLineCommentSection::mayReflow( - unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { - // Line comments have the indent as part of the prefix, so we need to - // recompute the start of the line. - StringRef IndentContent = Content[LineIndex]; - if (Lines[LineIndex].startswith("//")) { - IndentContent = Lines[LineIndex].substr(2); - } - // FIXME: Decide whether we want to reflow non-regular indents: - // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the - // OriginalPrefix[LineIndex-1]. That means we don't reflow - // // text that protrudes - // // into text with different indent - // We do reflow in that case in block comments. - return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && - mayReflowContent(Content[LineIndex]) && !Tok.Finalized && - !switchesFormatting(tokenAt(LineIndex)) && - OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/BreakableToken.h b/gnu/llvm/tools/clang/lib/Format/BreakableToken.h deleted file mode 100644 index 10e18017802..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/BreakableToken.h +++ /dev/null @@ -1,493 +0,0 @@ -//===--- BreakableToken.h - Format C++ code ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Declares BreakableToken, BreakableStringLiteral, BreakableComment, -/// BreakableBlockComment and BreakableLineCommentSection classes, that contain -/// token type-specific logic to break long lines in tokens and reflow content -/// between tokens. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H -#define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H - -#include "Encoding.h" -#include "TokenAnnotator.h" -#include "WhitespaceManager.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Support/Regex.h" -#include <utility> - -namespace clang { -namespace format { - -/// Checks if \p Token switches formatting, like /* clang-format off */. -/// \p Token must be a comment. -bool switchesFormatting(const FormatToken &Token); - -struct FormatStyle; - -/// Base class for tokens / ranges of tokens that can allow breaking -/// within the tokens - for example, to avoid whitespace beyond the column -/// limit, or to reflow text. -/// -/// Generally, a breakable token consists of logical lines, addressed by a line -/// index. For example, in a sequence of line comments, each line comment is its -/// own logical line; similarly, for a block comment, each line in the block -/// comment is on its own logical line. -/// -/// There are two methods to compute the layout of the token: -/// - getRangeLength measures the number of columns needed for a range of text -/// within a logical line, and -/// - getContentStartColumn returns the start column at which we want the -/// content of a logical line to start (potentially after introducing a line -/// break). -/// -/// The mechanism to adapt the layout of the breakable token is organised -/// around the concept of a \c Split, which is a whitespace range that signifies -/// a position of the content of a token where a reformatting might be done. -/// -/// Operating with splits is divided into two operations: -/// - getSplit, for finding a split starting at a position, -/// - insertBreak, for executing the split using a whitespace manager. -/// -/// There is a pair of operations that are used to compress a long whitespace -/// range with a single space if that will bring the line length under the -/// column limit: -/// - getLineLengthAfterCompression, for calculating the size in columns of the -/// line after a whitespace range has been compressed, and -/// - compressWhitespace, for executing the whitespace compression using a -/// whitespace manager; note that the compressed whitespace may be in the -/// middle of the original line and of the reformatted line. -/// -/// For tokens where the whitespace before each line needs to be also -/// reformatted, for example for tokens supporting reflow, there are analogous -/// operations that might be executed before the main line breaking occurs: -/// - getReflowSplit, for finding a split such that the content preceding it -/// needs to be specially reflown, -/// - reflow, for executing the split using a whitespace manager, -/// - introducesBreakBefore, for checking if reformatting the beginning -/// of the content introduces a line break before it, -/// - adaptStartOfLine, for executing the reflow using a whitespace -/// manager. -/// -/// For tokens that require the whitespace after the last line to be -/// reformatted, for example in multiline jsdoc comments that require the -/// trailing '*/' to be on a line of itself, there are analogous operations -/// that might be executed after the last line has been reformatted: -/// - getSplitAfterLastLine, for finding a split after the last line that needs -/// to be reflown, -/// - replaceWhitespaceAfterLastLine, for executing the reflow using a -/// whitespace manager. -/// -class BreakableToken { -public: - /// Contains starting character index and length of split. - typedef std::pair<StringRef::size_type, unsigned> Split; - - virtual ~BreakableToken() {} - - /// Returns the number of lines in this token in the original code. - virtual unsigned getLineCount() const = 0; - - /// Returns the number of columns required to format the text in the - /// byte range [\p Offset, \p Offset \c + \p Length). - /// - /// \p Offset is the byte offset from the start of the content of the line - /// at \p LineIndex. - /// - /// \p StartColumn is the column at which the text starts in the formatted - /// file, needed to compute tab stops correctly. - virtual unsigned getRangeLength(unsigned LineIndex, unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const = 0; - - /// Returns the number of columns required to format the text following - /// the byte \p Offset in the line \p LineIndex, including potentially - /// unbreakable sequences of tokens following after the end of the token. - /// - /// \p Offset is the byte offset from the start of the content of the line - /// at \p LineIndex. - /// - /// \p StartColumn is the column at which the text starts in the formatted - /// file, needed to compute tab stops correctly. - /// - /// For breakable tokens that never use extra space at the end of a line, this - /// is equivalent to getRangeLength with a Length of StringRef::npos. - virtual unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, - unsigned StartColumn) const { - return getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); - } - - /// Returns the column at which content in line \p LineIndex starts, - /// assuming no reflow. - /// - /// If \p Break is true, returns the column at which the line should start - /// after the line break. - /// If \p Break is false, returns the column at which the line itself will - /// start. - virtual unsigned getContentStartColumn(unsigned LineIndex, - bool Break) const = 0; - - /// Returns additional content indent required for the second line after the - /// content at line \p LineIndex is broken. - /// - // (Next lines do not start with `///` since otherwise -Wdocumentation picks - // up the example annotations and generates warnings for them) - // For example, Javadoc @param annotations require and indent of 4 spaces and - // in this example getContentIndex(1) returns 4. - // /** - // * @param loooooooooooooong line - // * continuation - // */ - virtual unsigned getContentIndent(unsigned LineIndex) const { - return 0; - } - - /// Returns a range (offset, length) at which to break the line at - /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not - /// violate \p ColumnLimit, assuming the text starting at \p TailOffset in - /// the token is formatted starting at ContentStartColumn in the reformatted - /// file. - virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, - unsigned ColumnLimit, unsigned ContentStartColumn, - llvm::Regex &CommentPragmasRegex) const = 0; - - /// Emits the previously retrieved \p Split via \p Whitespaces. - virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, - WhitespaceManager &Whitespaces) const = 0; - - /// Returns the number of columns needed to format - /// \p RemainingTokenColumns, assuming that Split is within the range measured - /// by \p RemainingTokenColumns, and that the whitespace in Split is reduced - /// to a single space. - unsigned getLengthAfterCompression(unsigned RemainingTokenColumns, - Split Split) const; - - /// Replaces the whitespace range described by \p Split with a single - /// space. - virtual void compressWhitespace(unsigned LineIndex, unsigned TailOffset, - Split Split, - WhitespaceManager &Whitespaces) const = 0; - - /// Returns whether the token supports reflowing text. - virtual bool supportsReflow() const { return false; } - - /// Returns a whitespace range (offset, length) of the content at \p - /// LineIndex such that the content of that line is reflown to the end of the - /// previous one. - /// - /// Returning (StringRef::npos, 0) indicates reflowing is not possible. - /// - /// The range will include any whitespace preceding the specified line's - /// content. - /// - /// If the split is not contained within one token, for example when reflowing - /// line comments, returns (0, <length>). - virtual Split getReflowSplit(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const { - return Split(StringRef::npos, 0); - } - - /// Reflows the current line into the end of the previous one. - virtual void reflow(unsigned LineIndex, - WhitespaceManager &Whitespaces) const {} - - /// Returns whether there will be a line break at the start of the - /// token. - virtual bool introducesBreakBeforeToken() const { - return false; - } - - /// Replaces the whitespace between \p LineIndex-1 and \p LineIndex. - virtual void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const {} - - /// Returns a whitespace range (offset, length) of the content at - /// the last line that needs to be reformatted after the last line has been - /// reformatted. - /// - /// A result having offset == StringRef::npos means that no reformat is - /// necessary. - virtual Split getSplitAfterLastLine(unsigned TailOffset) const { - return Split(StringRef::npos, 0); - } - - /// Replaces the whitespace from \p SplitAfterLastLine on the last line - /// after the last line has been formatted by performing a reformatting. - void replaceWhitespaceAfterLastLine(unsigned TailOffset, - Split SplitAfterLastLine, - WhitespaceManager &Whitespaces) const { - insertBreak(getLineCount() - 1, TailOffset, SplitAfterLastLine, - /*ContentIndent=*/0, Whitespaces); - } - - /// Updates the next token of \p State to the next token after this - /// one. This can be used when this token manages a set of underlying tokens - /// as a unit and is responsible for the formatting of the them. - virtual void updateNextToken(LineState &State) const {} - -protected: - BreakableToken(const FormatToken &Tok, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style) - : Tok(Tok), InPPDirective(InPPDirective), Encoding(Encoding), - Style(Style) {} - - const FormatToken &Tok; - const bool InPPDirective; - const encoding::Encoding Encoding; - const FormatStyle &Style; -}; - -class BreakableStringLiteral : public BreakableToken { -public: - /// Creates a breakable token for a single line string literal. - /// - /// \p StartColumn specifies the column in which the token will start - /// after formatting. - BreakableStringLiteral(const FormatToken &Tok, unsigned StartColumn, - StringRef Prefix, StringRef Postfix, - unsigned UnbreakableTailLength, bool InPPDirective, - encoding::Encoding Encoding, const FormatStyle &Style); - - Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, - unsigned ContentStartColumn, - llvm::Regex &CommentPragmasRegex) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; - void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) const override {} - unsigned getLineCount() const override; - unsigned getRangeLength(unsigned LineIndex, unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const override; - unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, - unsigned StartColumn) const override; - unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; - -protected: - // The column in which the token starts. - unsigned StartColumn; - // The prefix a line needs after a break in the token. - StringRef Prefix; - // The postfix a line needs before introducing a break. - StringRef Postfix; - // The token text excluding the prefix and postfix. - StringRef Line; - // Length of the sequence of tokens after this string literal that cannot - // contain line breaks. - unsigned UnbreakableTailLength; -}; - -class BreakableComment : public BreakableToken { -protected: - /// Creates a breakable token for a comment. - /// - /// \p StartColumn specifies the column in which the comment will start after - /// formatting. - BreakableComment(const FormatToken &Token, unsigned StartColumn, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - -public: - bool supportsReflow() const override { return true; } - unsigned getLineCount() const override; - Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, - unsigned ContentStartColumn, - llvm::Regex &CommentPragmasRegex) const override; - void compressWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, - WhitespaceManager &Whitespaces) const override; - -protected: - // Returns the token containing the line at LineIndex. - const FormatToken &tokenAt(unsigned LineIndex) const; - - // Checks if the content of line LineIndex may be reflown with the previous - // line. - virtual bool mayReflow(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const = 0; - - // Contains the original text of the lines of the block comment. - // - // In case of a block comments, excludes the leading /* in the first line and - // trailing */ in the last line. In case of line comments, excludes the - // leading // and spaces. - SmallVector<StringRef, 16> Lines; - - // Contains the text of the lines excluding all leading and trailing - // whitespace between the lines. Note that the decoration (if present) is also - // not considered part of the text. - SmallVector<StringRef, 16> Content; - - // Tokens[i] contains a reference to the token containing Lines[i] if the - // whitespace range before that token is managed by this block. - // Otherwise, Tokens[i] is a null pointer. - SmallVector<FormatToken *, 16> Tokens; - - // ContentColumn[i] is the target column at which Content[i] should be. - // Note that this excludes a leading "* " or "*" in case of block comments - // where all lines have a "*" prefix, or the leading "// " or "//" in case of - // line comments. - // - // In block comments, the first line's target column is always positive. The - // remaining lines' target columns are relative to the first line to allow - // correct indentation of comments in \c WhitespaceManager. Thus they can be - // negative as well (in case the first line needs to be unindented more than - // there's actual whitespace in another line). - SmallVector<int, 16> ContentColumn; - - // The intended start column of the first line of text from this section. - unsigned StartColumn; - - // The prefix to use in front a line that has been reflown up. - // For example, when reflowing the second line after the first here: - // // comment 1 - // // comment 2 - // we expect: - // // comment 1 comment 2 - // and not: - // // comment 1comment 2 - StringRef ReflowPrefix = " "; -}; - -class BreakableBlockComment : public BreakableComment { -public: - BreakableBlockComment(const FormatToken &Token, unsigned StartColumn, - unsigned OriginalStartColumn, bool FirstInLine, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - - unsigned getRangeLength(unsigned LineIndex, unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const override; - unsigned getRemainingLength(unsigned LineIndex, unsigned Offset, - unsigned StartColumn) const override; - unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; - unsigned getContentIndent(unsigned LineIndex) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; - Split getReflowSplit(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const override; - void reflow(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; - bool introducesBreakBeforeToken() const override; - void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; - Split getSplitAfterLastLine(unsigned TailOffset) const override; - - bool mayReflow(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const override; - - // Contains Javadoc annotations that require additional indent when continued - // on multiple lines. - static const llvm::StringSet<> ContentIndentingJavadocAnnotations; - -private: - // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex]. - // - // Updates Content[LineIndex-1] and Content[LineIndex] by stripping off - // leading and trailing whitespace. - // - // Sets ContentColumn to the intended column in which the text at - // Lines[LineIndex] starts (note that the decoration, if present, is not - // considered part of the text). - void adjustWhitespace(unsigned LineIndex, int IndentDelta); - - // The column at which the text of a broken line should start. - // Note that an optional decoration would go before that column. - // IndentAtLineBreak is a uniform position for all lines in a block comment, - // regardless of their relative position. - // FIXME: Revisit the decision to do this; the main reason was to support - // patterns like - // /**************//** - // * Comment - // We could also support such patterns by special casing the first line - // instead. - unsigned IndentAtLineBreak; - - // This is to distinguish between the case when the last line was empty and - // the case when it started with a decoration ("*" or "* "). - bool LastLineNeedsDecoration; - - // Either "* " if all lines begin with a "*", or empty. - StringRef Decoration; - - // If this block comment has decorations, this is the column of the start of - // the decorations. - unsigned DecorationColumn; - - // If true, make sure that the opening '/**' and the closing '*/' ends on a - // line of itself. Styles like jsdoc require this for multiline comments. - bool DelimitersOnNewline; - - // Length of the sequence of tokens after this string literal that cannot - // contain line breaks. - unsigned UnbreakableTailLength; -}; - -class BreakableLineCommentSection : public BreakableComment { -public: - BreakableLineCommentSection(const FormatToken &Token, unsigned StartColumn, - unsigned OriginalStartColumn, bool FirstInLine, - bool InPPDirective, encoding::Encoding Encoding, - const FormatStyle &Style); - - unsigned getRangeLength(unsigned LineIndex, unsigned Offset, - StringRef::size_type Length, - unsigned StartColumn) const override; - unsigned getContentStartColumn(unsigned LineIndex, bool Break) const override; - void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, - unsigned ContentIndent, - WhitespaceManager &Whitespaces) const override; - Split getReflowSplit(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const override; - void reflow(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; - void adaptStartOfLine(unsigned LineIndex, - WhitespaceManager &Whitespaces) const override; - void updateNextToken(LineState &State) const override; - bool mayReflow(unsigned LineIndex, - llvm::Regex &CommentPragmasRegex) const override; - -private: - // OriginalPrefix[i] contains the original prefix of line i, including - // trailing whitespace before the start of the content. The indentation - // preceding the prefix is not included. - // For example, if the line is: - // // content - // then the original prefix is "// ". - SmallVector<StringRef, 16> OriginalPrefix; - - // Prefix[i] contains the intended leading "//" with trailing spaces to - // account for the indentation of content within the comment at line i after - // formatting. It can be different than the original prefix when the original - // line starts like this: - // //content - // Then the original prefix is "//", but the prefix is "// ". - SmallVector<StringRef, 16> Prefix; - - SmallVector<unsigned, 16> OriginalContentColumn; - - /// The token to which the last line of this breakable token belongs - /// to; nullptr if that token is the initial token. - /// - /// The distinction is because if the token of the last line of this breakable - /// token is distinct from the initial token, this breakable token owns the - /// whitespace before the token of the last line, and the whitespace manager - /// must be able to modify it. - FormatToken *LastLineTok = nullptr; -}; -} // namespace format -} // namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/CMakeLists.txt b/gnu/llvm/tools/clang/lib/Format/CMakeLists.txt deleted file mode 100644 index 0019d045cd0..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -set(LLVM_LINK_COMPONENTS support) - -add_clang_library(clangFormat - AffectedRangeManager.cpp - BreakableToken.cpp - ContinuationIndenter.cpp - Format.cpp - FormatToken.cpp - FormatTokenLexer.cpp - NamespaceEndCommentsFixer.cpp - SortJavaScriptImports.cpp - TokenAnalyzer.cpp - TokenAnnotator.cpp - UnwrappedLineFormatter.cpp - UnwrappedLineParser.cpp - UsingDeclarationsSorter.cpp - WhitespaceManager.cpp - - LINK_LIBS - clangBasic - clangLex - clangToolingCore - clangToolingInclusions - ) diff --git a/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp b/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp deleted file mode 100644 index c369b94b998..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.cpp +++ /dev/null @@ -1,2191 +0,0 @@ -//===--- ContinuationIndenter.cpp - Format C++ code -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements the continuation indenter. -/// -//===----------------------------------------------------------------------===// - -#include "ContinuationIndenter.h" -#include "BreakableToken.h" -#include "FormatInternal.h" -#include "WhitespaceManager.h" -#include "clang/Basic/OperatorPrecedence.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "format-indenter" - -namespace clang { -namespace format { - -// Returns true if a TT_SelectorName should be indented when wrapped, -// false otherwise. -static bool shouldIndentWrappedSelectorName(const FormatStyle &Style, - LineType LineType) { - return Style.IndentWrappedFunctionNames || LineType == LT_ObjCMethodDecl; -} - -// Returns the length of everything up to the first possible line break after -// the ), ], } or > matching \c Tok. -static unsigned getLengthToMatchingParen(const FormatToken &Tok, - const std::vector<ParenState> &Stack) { - // Normally whether or not a break before T is possible is calculated and - // stored in T.CanBreakBefore. Braces, array initializers and text proto - // messages like `key: < ... >` are an exception: a break is possible - // before a closing brace R if a break was inserted after the corresponding - // opening brace. The information about whether or not a break is needed - // before a closing brace R is stored in the ParenState field - // S.BreakBeforeClosingBrace where S is the state that R closes. - // - // In order to decide whether there can be a break before encountered right - // braces, this implementation iterates over the sequence of tokens and over - // the paren stack in lockstep, keeping track of the stack level which visited - // right braces correspond to in MatchingStackIndex. - // - // For example, consider: - // L. <- line number - // 1. { - // 2. {1}, - // 3. {2}, - // 4. {{3}}} - // ^ where we call this method with this token. - // The paren stack at this point contains 3 brace levels: - // 0. { at line 1, BreakBeforeClosingBrace: true - // 1. first { at line 4, BreakBeforeClosingBrace: false - // 2. second { at line 4, BreakBeforeClosingBrace: false, - // where there might be fake parens levels in-between these levels. - // The algorithm will start at the first } on line 4, which is the matching - // brace of the initial left brace and at level 2 of the stack. Then, - // examining BreakBeforeClosingBrace: false at level 2, it will continue to - // the second } on line 4, and will traverse the stack downwards until it - // finds the matching { on level 1. Then, examining BreakBeforeClosingBrace: - // false at level 1, it will continue to the third } on line 4 and will - // traverse the stack downwards until it finds the matching { on level 0. - // Then, examining BreakBeforeClosingBrace: true at level 0, the algorithm - // will stop and will use the second } on line 4 to determine the length to - // return, as in this example the range will include the tokens: {3}} - // - // The algorithm will only traverse the stack if it encounters braces, array - // initializer squares or text proto angle brackets. - if (!Tok.MatchingParen) - return 0; - FormatToken *End = Tok.MatchingParen; - // Maintains a stack level corresponding to the current End token. - int MatchingStackIndex = Stack.size() - 1; - // Traverses the stack downwards, looking for the level to which LBrace - // corresponds. Returns either a pointer to the matching level or nullptr if - // LParen is not found in the initial portion of the stack up to - // MatchingStackIndex. - auto FindParenState = [&](const FormatToken *LBrace) -> const ParenState * { - while (MatchingStackIndex >= 0 && Stack[MatchingStackIndex].Tok != LBrace) - --MatchingStackIndex; - return MatchingStackIndex >= 0 ? &Stack[MatchingStackIndex] : nullptr; - }; - for (; End->Next; End = End->Next) { - if (End->Next->CanBreakBefore) - break; - if (!End->Next->closesScope()) - continue; - if (End->Next->MatchingParen && - End->Next->MatchingParen->isOneOf( - tok::l_brace, TT_ArrayInitializerLSquare, tok::less)) { - const ParenState *State = FindParenState(End->Next->MatchingParen); - if (State && State->BreakBeforeClosingBrace) - break; - } - } - return End->TotalLength - Tok.TotalLength + 1; -} - -static unsigned getLengthToNextOperator(const FormatToken &Tok) { - if (!Tok.NextOperator) - return 0; - return Tok.NextOperator->TotalLength - Tok.TotalLength; -} - -// Returns \c true if \c Tok is the "." or "->" of a call and starts the next -// segment of a builder type call. -static bool startsSegmentOfBuilderTypeCall(const FormatToken &Tok) { - return Tok.isMemberAccess() && Tok.Previous && Tok.Previous->closesScope(); -} - -// Returns \c true if \c Current starts a new parameter. -static bool startsNextParameter(const FormatToken &Current, - const FormatStyle &Style) { - const FormatToken &Previous = *Current.Previous; - if (Current.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) - return true; - if (Style.Language == FormatStyle::LK_Proto && Current.is(TT_SelectorName)) - return true; - return Previous.is(tok::comma) && !Current.isTrailingComment() && - ((Previous.isNot(TT_CtorInitializerComma) || - Style.BreakConstructorInitializers != - FormatStyle::BCIS_BeforeComma) && - (Previous.isNot(TT_InheritanceComma) || - Style.BreakInheritanceList != FormatStyle::BILS_BeforeComma)); -} - -static bool opensProtoMessageField(const FormatToken &LessTok, - const FormatStyle &Style) { - if (LessTok.isNot(tok::less)) - return false; - return Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && - (LessTok.NestingLevel > 0 || - (LessTok.Previous && LessTok.Previous->is(tok::equal)))); -} - -// Returns the delimiter of a raw string literal, or None if TokenText is not -// the text of a raw string literal. The delimiter could be the empty string. -// For example, the delimiter of R"deli(cont)deli" is deli. -static llvm::Optional<StringRef> getRawStringDelimiter(StringRef TokenText) { - if (TokenText.size() < 5 // The smallest raw string possible is 'R"()"'. - || !TokenText.startswith("R\"") || !TokenText.endswith("\"")) - return None; - - // A raw string starts with 'R"<delimiter>(' and delimiter is ascii and has - // size at most 16 by the standard, so the first '(' must be among the first - // 19 bytes. - size_t LParenPos = TokenText.substr(0, 19).find_first_of('('); - if (LParenPos == StringRef::npos) - return None; - StringRef Delimiter = TokenText.substr(2, LParenPos - 2); - - // Check that the string ends in ')Delimiter"'. - size_t RParenPos = TokenText.size() - Delimiter.size() - 2; - if (TokenText[RParenPos] != ')') - return None; - if (!TokenText.substr(RParenPos + 1).startswith(Delimiter)) - return None; - return Delimiter; -} - -// Returns the canonical delimiter for \p Language, or the empty string if no -// canonical delimiter is specified. -static StringRef -getCanonicalRawStringDelimiter(const FormatStyle &Style, - FormatStyle::LanguageKind Language) { - for (const auto &Format : Style.RawStringFormats) { - if (Format.Language == Language) - return StringRef(Format.CanonicalDelimiter); - } - return ""; -} - -RawStringFormatStyleManager::RawStringFormatStyleManager( - const FormatStyle &CodeStyle) { - for (const auto &RawStringFormat : CodeStyle.RawStringFormats) { - llvm::Optional<FormatStyle> LanguageStyle = - CodeStyle.GetLanguageStyle(RawStringFormat.Language); - if (!LanguageStyle) { - FormatStyle PredefinedStyle; - if (!getPredefinedStyle(RawStringFormat.BasedOnStyle, - RawStringFormat.Language, &PredefinedStyle)) { - PredefinedStyle = getLLVMStyle(); - PredefinedStyle.Language = RawStringFormat.Language; - } - LanguageStyle = PredefinedStyle; - } - LanguageStyle->ColumnLimit = CodeStyle.ColumnLimit; - for (StringRef Delimiter : RawStringFormat.Delimiters) { - DelimiterStyle.insert({Delimiter, *LanguageStyle}); - } - for (StringRef EnclosingFunction : RawStringFormat.EnclosingFunctions) { - EnclosingFunctionStyle.insert({EnclosingFunction, *LanguageStyle}); - } - } -} - -llvm::Optional<FormatStyle> -RawStringFormatStyleManager::getDelimiterStyle(StringRef Delimiter) const { - auto It = DelimiterStyle.find(Delimiter); - if (It == DelimiterStyle.end()) - return None; - return It->second; -} - -llvm::Optional<FormatStyle> -RawStringFormatStyleManager::getEnclosingFunctionStyle( - StringRef EnclosingFunction) const { - auto It = EnclosingFunctionStyle.find(EnclosingFunction); - if (It == EnclosingFunctionStyle.end()) - return None; - return It->second; -} - -ContinuationIndenter::ContinuationIndenter(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - const SourceManager &SourceMgr, - WhitespaceManager &Whitespaces, - encoding::Encoding Encoding, - bool BinPackInconclusiveFunctions) - : Style(Style), Keywords(Keywords), SourceMgr(SourceMgr), - Whitespaces(Whitespaces), Encoding(Encoding), - BinPackInconclusiveFunctions(BinPackInconclusiveFunctions), - CommentPragmasRegex(Style.CommentPragmas), RawStringFormats(Style) {} - -LineState ContinuationIndenter::getInitialState(unsigned FirstIndent, - unsigned FirstStartColumn, - const AnnotatedLine *Line, - bool DryRun) { - LineState State; - State.FirstIndent = FirstIndent; - if (FirstStartColumn && Line->First->NewlinesBefore == 0) - State.Column = FirstStartColumn; - else - State.Column = FirstIndent; - // With preprocessor directive indentation, the line starts on column 0 - // since it's indented after the hash, but FirstIndent is set to the - // preprocessor indent. - if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && - (Line->Type == LT_PreprocessorDirective || - Line->Type == LT_ImportStatement)) - State.Column = 0; - State.Line = Line; - State.NextToken = Line->First; - State.Stack.push_back(ParenState(/*Tok=*/nullptr, FirstIndent, FirstIndent, - /*AvoidBinPacking=*/false, - /*NoLineBreak=*/false)); - State.LineContainsContinuedForLoopSection = false; - State.NoContinuation = false; - State.StartOfStringLiteral = 0; - State.StartOfLineLevel = 0; - State.LowestLevelOnLine = 0; - State.IgnoreStackForComparison = false; - - if (Style.Language == FormatStyle::LK_TextProto) { - // We need this in order to deal with the bin packing of text fields at - // global scope. - State.Stack.back().AvoidBinPacking = true; - State.Stack.back().BreakBeforeParameter = true; - State.Stack.back().AlignColons = false; - } - - // The first token has already been indented and thus consumed. - moveStateToNextToken(State, DryRun, /*Newline=*/false); - return State; -} - -bool ContinuationIndenter::canBreak(const LineState &State) { - const FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *Current.Previous; - assert(&Previous == Current.Previous); - if (!Current.CanBreakBefore && !(State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style))) - return false; - // The opening "{" of a braced list has to be on the same line as the first - // element if it is nested in another braced init list or function call. - if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && - Previous.isNot(TT_DictLiteral) && Previous.BlockKind == BK_BracedInit && - Previous.Previous && - Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) - return false; - // This prevents breaks like: - // ... - // SomeParameter, OtherParameter).DoSomething( - // ... - // As they hide "DoSomething" and are generally bad for readability. - if (Previous.opensScope() && Previous.isNot(tok::l_brace) && - State.LowestLevelOnLine < State.StartOfLineLevel && - State.LowestLevelOnLine < Current.NestingLevel) - return false; - if (Current.isMemberAccess() && State.Stack.back().ContainsUnwrappedBuilder) - return false; - - // Don't create a 'hanging' indent if there are multiple blocks in a single - // statement. - if (Previous.is(tok::l_brace) && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].NestedBlockInlined && - State.Stack[State.Stack.size() - 2].HasMultipleNestedBlocks) - return false; - - // Don't break after very short return types (e.g. "void") as that is often - // unexpected. - if (Current.is(TT_FunctionDeclarationName) && State.Column < 6) { - if (Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) - return false; - } - - // If binary operators are moved to the next line (including commas for some - // styles of constructor initializers), that's always ok. - if (!Current.isOneOf(TT_BinaryOperator, tok::comma) && - State.Stack.back().NoLineBreakInOperand) - return false; - - if (Previous.is(tok::l_square) && Previous.is(TT_ObjCMethodExpr)) - return false; - - return !State.Stack.back().NoLineBreak; -} - -bool ContinuationIndenter::mustBreak(const LineState &State) { - const FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *Current.Previous; - if (Current.MustBreakBefore || Current.is(TT_InlineASMColon)) - return true; - if (State.Stack.back().BreakBeforeClosingBrace && - Current.closesBlockOrBlockTypeList(Style)) - return true; - if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) - return true; - if (Style.Language == FormatStyle::LK_ObjC && - Current.ObjCSelectorNameParts > 1 && - Current.startsSequence(TT_SelectorName, tok::colon, tok::caret)) { - return true; - } - if ((startsNextParameter(Current, Style) || Previous.is(tok::semi) || - (Previous.is(TT_TemplateCloser) && Current.is(TT_StartOfName) && - Style.isCpp() && - // FIXME: This is a temporary workaround for the case where clang-format - // sets BreakBeforeParameter to avoid bin packing and this creates a - // completely unnecessary line break after a template type that isn't - // line-wrapped. - (Previous.NestingLevel == 1 || Style.BinPackParameters)) || - (Style.BreakBeforeTernaryOperators && Current.is(TT_ConditionalExpr) && - Previous.isNot(tok::question)) || - (!Style.BreakBeforeTernaryOperators && - Previous.is(TT_ConditionalExpr))) && - State.Stack.back().BreakBeforeParameter && !Current.isTrailingComment() && - !Current.isOneOf(tok::r_paren, tok::r_brace)) - return true; - if (((Previous.is(TT_DictLiteral) && Previous.is(tok::l_brace)) || - (Previous.is(TT_ArrayInitializerLSquare) && - Previous.ParameterCount > 1) || - opensProtoMessageField(Previous, Style)) && - Style.ColumnLimit > 0 && - getLengthToMatchingParen(Previous, State.Stack) + State.Column - 1 > - getColumnLimit(State)) - return true; - - const FormatToken &BreakConstructorInitializersToken = - Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon - ? Previous - : Current; - if (BreakConstructorInitializersToken.is(TT_CtorInitializerColon) && - (State.Column + State.Line->Last->TotalLength - Previous.TotalLength > - getColumnLimit(State) || - State.Stack.back().BreakBeforeParameter) && - (Style.AllowShortFunctionsOnASingleLine != FormatStyle::SFS_All || - Style.BreakConstructorInitializers != FormatStyle::BCIS_BeforeColon || - Style.ColumnLimit != 0)) - return true; - - if (Current.is(TT_ObjCMethodExpr) && !Previous.is(TT_SelectorName) && - State.Line->startsWith(TT_ObjCMethodSpecifier)) - return true; - if (Current.is(TT_SelectorName) && !Previous.is(tok::at) && - State.Stack.back().ObjCSelectorNameFound && - State.Stack.back().BreakBeforeParameter) - return true; - - unsigned NewLineColumn = getNewLineColumn(State); - if (Current.isMemberAccess() && Style.ColumnLimit != 0 && - State.Column + getLengthToNextOperator(Current) > Style.ColumnLimit && - (State.Column > NewLineColumn || - Current.NestingLevel < State.StartOfLineLevel)) - return true; - - if (startsSegmentOfBuilderTypeCall(Current) && - (State.Stack.back().CallContinuation != 0 || - State.Stack.back().BreakBeforeParameter) && - // JavaScript is treated different here as there is a frequent pattern: - // SomeFunction(function() { - // ... - // }.bind(...)); - // FIXME: We should find a more generic solution to this problem. - !(State.Column <= NewLineColumn && - Style.Language == FormatStyle::LK_JavaScript) && - !(Previous.closesScopeAfterBlock() && - State.Column <= NewLineColumn)) - return true; - - // If the template declaration spans multiple lines, force wrap before the - // function/class declaration - if (Previous.ClosesTemplateDeclaration && - State.Stack.back().BreakBeforeParameter && Current.CanBreakBefore) - return true; - - if (State.Column <= NewLineColumn) - return false; - - if (Style.AlwaysBreakBeforeMultilineStrings && - (NewLineColumn == State.FirstIndent + Style.ContinuationIndentWidth || - Previous.is(tok::comma) || Current.NestingLevel < 2) && - !Previous.isOneOf(tok::kw_return, tok::lessless, tok::at) && - !Previous.isOneOf(TT_InlineASMColon, TT_ConditionalExpr) && - nextIsMultilineString(State)) - return true; - - // Using CanBreakBefore here and below takes care of the decision whether the - // current style uses wrapping before or after operators for the given - // operator. - if (Previous.is(TT_BinaryOperator) && Current.CanBreakBefore) { - // If we need to break somewhere inside the LHS of a binary expression, we - // should also break after the operator. Otherwise, the formatting would - // hide the operator precedence, e.g. in: - // if (aaaaaaaaaaaaaa == - // bbbbbbbbbbbbbb && c) {.. - // For comparisons, we only apply this rule, if the LHS is a binary - // expression itself as otherwise, the line breaks seem superfluous. - // We need special cases for ">>" which we have split into two ">" while - // lexing in order to make template parsing easier. - bool IsComparison = (Previous.getPrecedence() == prec::Relational || - Previous.getPrecedence() == prec::Equality || - Previous.getPrecedence() == prec::Spaceship) && - Previous.Previous && - Previous.Previous->isNot(TT_BinaryOperator); // For >>. - bool LHSIsBinaryExpr = - Previous.Previous && Previous.Previous->EndsBinaryExpression; - if ((!IsComparison || LHSIsBinaryExpr) && !Current.isTrailingComment() && - Previous.getPrecedence() != prec::Assignment && - State.Stack.back().BreakBeforeParameter) - return true; - } else if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore && - State.Stack.back().BreakBeforeParameter) { - return true; - } - - // Same as above, but for the first "<<" operator. - if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator) && - State.Stack.back().BreakBeforeParameter && - State.Stack.back().FirstLessLess == 0) - return true; - - if (Current.NestingLevel == 0 && !Current.isTrailingComment()) { - // Always break after "template <...>" and leading annotations. This is only - // for cases where the entire line does not fit on a single line as a - // different LineFormatter would be used otherwise. - if (Previous.ClosesTemplateDeclaration) - return Style.AlwaysBreakTemplateDeclarations != FormatStyle::BTDS_No; - if (Previous.is(TT_FunctionAnnotationRParen)) - return true; - if (Previous.is(TT_LeadingJavaAnnotation) && Current.isNot(tok::l_paren) && - Current.isNot(TT_LeadingJavaAnnotation)) - return true; - } - - // If the return type spans multiple lines, wrap before the function name. - if ((Current.is(TT_FunctionDeclarationName) || - (Current.is(tok::kw_operator) && !Previous.is(tok::coloncolon))) && - !Previous.is(tok::kw_template) && State.Stack.back().BreakBeforeParameter) - return true; - - // The following could be precomputed as they do not depend on the state. - // However, as they should take effect only if the UnwrappedLine does not fit - // into the ColumnLimit, they are checked here in the ContinuationIndenter. - if (Style.ColumnLimit != 0 && Previous.BlockKind == BK_Block && - Previous.is(tok::l_brace) && !Current.isOneOf(tok::r_brace, tok::comment)) - return true; - - if (Current.is(tok::lessless) && - ((Previous.is(tok::identifier) && Previous.TokenText == "endl") || - (Previous.Tok.isLiteral() && (Previous.TokenText.endswith("\\n\"") || - Previous.TokenText == "\'\\n\'")))) - return true; - - if (Previous.is(TT_BlockComment) && Previous.IsMultiline) - return true; - - if (State.NoContinuation) - return true; - - return false; -} - -unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline, - bool DryRun, - unsigned ExtraSpaces) { - const FormatToken &Current = *State.NextToken; - - assert(!State.Stack.empty()); - State.NoContinuation = false; - - if ((Current.is(TT_ImplicitStringLiteral) && - (Current.Previous->Tok.getIdentifierInfo() == nullptr || - Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() == - tok::pp_not_keyword))) { - unsigned EndColumn = - SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd()); - if (Current.LastNewlineOffset != 0) { - // If there is a newline within this token, the final column will solely - // determined by the current end column. - State.Column = EndColumn; - } else { - unsigned StartColumn = - SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getBegin()); - assert(EndColumn >= StartColumn); - State.Column += EndColumn - StartColumn; - } - moveStateToNextToken(State, DryRun, /*Newline=*/false); - return 0; - } - - unsigned Penalty = 0; - if (Newline) - Penalty = addTokenOnNewLine(State, DryRun); - else - addTokenOnCurrentLine(State, DryRun, ExtraSpaces); - - return moveStateToNextToken(State, DryRun, Newline) + Penalty; -} - -void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, - unsigned ExtraSpaces) { - FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *State.NextToken->Previous; - if (Current.is(tok::equal) && - (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) && - State.Stack.back().VariablePos == 0) { - State.Stack.back().VariablePos = State.Column; - // Move over * and & if they are bound to the variable name. - const FormatToken *Tok = &Previous; - while (Tok && State.Stack.back().VariablePos >= Tok->ColumnWidth) { - State.Stack.back().VariablePos -= Tok->ColumnWidth; - if (Tok->SpacesRequiredBefore != 0) - break; - Tok = Tok->Previous; - } - if (Previous.PartOfMultiVariableDeclStmt) - State.Stack.back().LastSpace = State.Stack.back().VariablePos; - } - - unsigned Spaces = Current.SpacesRequiredBefore + ExtraSpaces; - - // Indent preprocessor directives after the hash if required. - int PPColumnCorrection = 0; - if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash && - Previous.is(tok::hash) && State.FirstIndent > 0 && - (State.Line->Type == LT_PreprocessorDirective || - State.Line->Type == LT_ImportStatement)) { - Spaces += State.FirstIndent; - - // For preprocessor indent with tabs, State.Column will be 1 because of the - // hash. This causes second-level indents onward to have an extra space - // after the tabs. We avoid this misalignment by subtracting 1 from the - // column value passed to replaceWhitespace(). - if (Style.UseTab != FormatStyle::UT_Never) - PPColumnCorrection = -1; - } - - if (!DryRun) - Whitespaces.replaceWhitespace(Current, /*Newlines=*/0, Spaces, - State.Column + Spaces + PPColumnCorrection); - - // If "BreakBeforeInheritanceComma" mode, don't break within the inheritance - // declaration unless there is multiple inheritance. - if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && - Current.is(TT_InheritanceColon)) - State.Stack.back().NoLineBreak = true; - if (Style.BreakInheritanceList == FormatStyle::BILS_AfterColon && - Previous.is(TT_InheritanceColon)) - State.Stack.back().NoLineBreak = true; - - if (Current.is(TT_SelectorName) && - !State.Stack.back().ObjCSelectorNameFound) { - unsigned MinIndent = - std::max(State.FirstIndent + Style.ContinuationIndentWidth, - State.Stack.back().Indent); - unsigned FirstColonPos = State.Column + Spaces + Current.ColumnWidth; - if (Current.LongestObjCSelectorName == 0) - State.Stack.back().AlignColons = false; - else if (MinIndent + Current.LongestObjCSelectorName > FirstColonPos) - State.Stack.back().ColonPos = MinIndent + Current.LongestObjCSelectorName; - else - State.Stack.back().ColonPos = FirstColonPos; - } - - // In "AlwaysBreak" mode, enforce wrapping directly after the parenthesis by - // disallowing any further line breaks if there is no line break after the - // opening parenthesis. Don't break if it doesn't conserve columns. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak && - Previous.isOneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && - State.Column > getNewLineColumn(State) && - (!Previous.Previous || !Previous.Previous->isOneOf( - tok::kw_for, tok::kw_while, tok::kw_switch)) && - // Don't do this for simple (no expressions) one-argument function calls - // as that feels like needlessly wasting whitespace, e.g.: - // - // caaaaaaaaaaaall( - // caaaaaaaaaaaall( - // caaaaaaaaaaaall( - // caaaaaaaaaaaaaaaaaaaaaaall(aaaaaaaaaaaaaa, aaaaaaaaa)))); - Current.FakeLParens.size() > 0 && - Current.FakeLParens.back() > prec::Unknown) - State.Stack.back().NoLineBreak = true; - if (Previous.is(TT_TemplateString) && Previous.opensScope()) - State.Stack.back().NoLineBreak = true; - - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && - Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && - (Current.isNot(TT_LineComment) || Previous.BlockKind == BK_BracedInit)) - State.Stack.back().Indent = State.Column + Spaces; - if (State.Stack.back().AvoidBinPacking && startsNextParameter(Current, Style)) - State.Stack.back().NoLineBreak = true; - if (startsSegmentOfBuilderTypeCall(Current) && - State.Column > getNewLineColumn(State)) - State.Stack.back().ContainsUnwrappedBuilder = true; - - if (Current.is(TT_LambdaArrow) && Style.Language == FormatStyle::LK_Java) - State.Stack.back().NoLineBreak = true; - if (Current.isMemberAccess() && Previous.is(tok::r_paren) && - (Previous.MatchingParen && - (Previous.TotalLength - Previous.MatchingParen->TotalLength > 10))) - // If there is a function call with long parameters, break before trailing - // calls. This prevents things like: - // EXPECT_CALL(SomeLongParameter).Times( - // 2); - // We don't want to do this for short parameters as they can just be - // indexes. - State.Stack.back().NoLineBreak = true; - - // Don't allow the RHS of an operator to be split over multiple lines unless - // there is a line-break right after the operator. - // Exclude relational operators, as there, it is always more desirable to - // have the LHS 'left' of the RHS. - const FormatToken *P = Current.getPreviousNonComment(); - if (!Current.is(tok::comment) && P && - (P->isOneOf(TT_BinaryOperator, tok::comma) || - (P->is(TT_ConditionalExpr) && P->is(tok::colon))) && - !P->isOneOf(TT_OverloadedOperator, TT_CtorInitializerComma) && - P->getPrecedence() != prec::Assignment && - P->getPrecedence() != prec::Relational && - P->getPrecedence() != prec::Spaceship) { - bool BreakBeforeOperator = - P->MustBreakBefore || P->is(tok::lessless) || - (P->is(TT_BinaryOperator) && - Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None) || - (P->is(TT_ConditionalExpr) && Style.BreakBeforeTernaryOperators); - // Don't do this if there are only two operands. In these cases, there is - // always a nice vertical separation between them and the extra line break - // does not help. - bool HasTwoOperands = - P->OperatorIndex == 0 && !P->NextOperator && !P->is(TT_ConditionalExpr); - if ((!BreakBeforeOperator && !(HasTwoOperands && Style.AlignOperands)) || - (!State.Stack.back().LastOperatorWrapped && BreakBeforeOperator)) - State.Stack.back().NoLineBreakInOperand = true; - } - - State.Column += Spaces; - if (Current.isNot(tok::comment) && Previous.is(tok::l_paren) && - Previous.Previous && - (Previous.Previous->isOneOf(tok::kw_if, tok::kw_for) || - Previous.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) { - // Treat the condition inside an if as if it was a second function - // parameter, i.e. let nested calls have a continuation indent. - State.Stack.back().LastSpace = State.Column; - State.Stack.back().NestedBlockIndent = State.Column; - } else if (!Current.isOneOf(tok::comment, tok::caret) && - ((Previous.is(tok::comma) && - !Previous.is(TT_OverloadedOperator)) || - (Previous.is(tok::colon) && Previous.is(TT_ObjCMethodExpr)))) { - State.Stack.back().LastSpace = State.Column; - } else if (Previous.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == - FormatStyle::BCIS_AfterColon) { - State.Stack.back().Indent = State.Column; - State.Stack.back().LastSpace = State.Column; - } else if ((Previous.isOneOf(TT_BinaryOperator, TT_ConditionalExpr, - TT_CtorInitializerColon)) && - ((Previous.getPrecedence() != prec::Assignment && - (Previous.isNot(tok::lessless) || Previous.OperatorIndex != 0 || - Previous.NextOperator)) || - Current.StartsBinaryExpression)) { - // Indent relative to the RHS of the expression unless this is a simple - // assignment without binary expression on the RHS. Also indent relative to - // unary operators and the colons of constructor initializers. - if (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None) - State.Stack.back().LastSpace = State.Column; - } else if (Previous.is(TT_InheritanceColon)) { - State.Stack.back().Indent = State.Column; - State.Stack.back().LastSpace = State.Column; - } else if (Previous.opensScope()) { - // If a function has a trailing call, indent all parameters from the - // opening parenthesis. This avoids confusing indents like: - // OuterFunction(InnerFunctionCall( // break - // ParameterToInnerFunction)) // break - // .SecondInnerFunctionCall(); - bool HasTrailingCall = false; - if (Previous.MatchingParen) { - const FormatToken *Next = Previous.MatchingParen->getNextNonComment(); - HasTrailingCall = Next && Next->isMemberAccess(); - } - if (HasTrailingCall && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].CallContinuation == 0) - State.Stack.back().LastSpace = State.Column; - } -} - -unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, - bool DryRun) { - FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *State.NextToken->Previous; - - // Extra penalty that needs to be added because of the way certain line - // breaks are chosen. - unsigned Penalty = 0; - - const FormatToken *PreviousNonComment = Current.getPreviousNonComment(); - const FormatToken *NextNonComment = Previous.getNextNonComment(); - if (!NextNonComment) - NextNonComment = &Current; - // The first line break on any NestingLevel causes an extra penalty in order - // prefer similar line breaks. - if (!State.Stack.back().ContainsLineBreak) - Penalty += 15; - State.Stack.back().ContainsLineBreak = true; - - Penalty += State.NextToken->SplitPenalty; - - // Breaking before the first "<<" is generally not desirable if the LHS is - // short. Also always add the penalty if the LHS is split over multiple lines - // to avoid unnecessary line breaks that just work around this penalty. - if (NextNonComment->is(tok::lessless) && - State.Stack.back().FirstLessLess == 0 && - (State.Column <= Style.ColumnLimit / 3 || - State.Stack.back().BreakBeforeParameter)) - Penalty += Style.PenaltyBreakFirstLessLess; - - State.Column = getNewLineColumn(State); - - // Indent nested blocks relative to this column, unless in a very specific - // JavaScript special case where: - // - // var loooooong_name = - // function() { - // // code - // } - // - // is common and should be formatted like a free-standing function. The same - // goes for wrapping before the lambda return type arrow. - if (!Current.is(TT_LambdaArrow) && - (Style.Language != FormatStyle::LK_JavaScript || - Current.NestingLevel != 0 || !PreviousNonComment || - !PreviousNonComment->is(tok::equal) || - !Current.isOneOf(Keywords.kw_async, Keywords.kw_function))) - State.Stack.back().NestedBlockIndent = State.Column; - - if (NextNonComment->isMemberAccess()) { - if (State.Stack.back().CallContinuation == 0) - State.Stack.back().CallContinuation = State.Column; - } else if (NextNonComment->is(TT_SelectorName)) { - if (!State.Stack.back().ObjCSelectorNameFound) { - if (NextNonComment->LongestObjCSelectorName == 0) { - State.Stack.back().AlignColons = false; - } else { - State.Stack.back().ColonPos = - (shouldIndentWrappedSelectorName(Style, State.Line->Type) - ? std::max(State.Stack.back().Indent, - State.FirstIndent + Style.ContinuationIndentWidth) - : State.Stack.back().Indent) + - std::max(NextNonComment->LongestObjCSelectorName, - NextNonComment->ColumnWidth); - } - } else if (State.Stack.back().AlignColons && - State.Stack.back().ColonPos <= NextNonComment->ColumnWidth) { - State.Stack.back().ColonPos = State.Column + NextNonComment->ColumnWidth; - } - } else if (PreviousNonComment && PreviousNonComment->is(tok::colon) && - PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) { - // FIXME: This is hacky, find a better way. The problem is that in an ObjC - // method expression, the block should be aligned to the line starting it, - // e.g.: - // [aaaaaaaaaaaaaaa aaaaaaaaa: \\ break for some reason - // ^(int *i) { - // // ... - // }]; - // Thus, we set LastSpace of the next higher NestingLevel, to which we move - // when we consume all of the "}"'s FakeRParens at the "{". - if (State.Stack.size() > 1) - State.Stack[State.Stack.size() - 2].LastSpace = - std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + - Style.ContinuationIndentWidth; - } - - if ((PreviousNonComment && - PreviousNonComment->isOneOf(tok::comma, tok::semi) && - !State.Stack.back().AvoidBinPacking) || - Previous.is(TT_BinaryOperator)) - State.Stack.back().BreakBeforeParameter = false; - if (PreviousNonComment && - PreviousNonComment->isOneOf(TT_TemplateCloser, TT_JavaAnnotation) && - Current.NestingLevel == 0) - State.Stack.back().BreakBeforeParameter = false; - if (NextNonComment->is(tok::question) || - (PreviousNonComment && PreviousNonComment->is(tok::question))) - State.Stack.back().BreakBeforeParameter = true; - if (Current.is(TT_BinaryOperator) && Current.CanBreakBefore) - State.Stack.back().BreakBeforeParameter = false; - - if (!DryRun) { - unsigned MaxEmptyLinesToKeep = Style.MaxEmptyLinesToKeep + 1; - if (Current.is(tok::r_brace) && Current.MatchingParen && - // Only strip trailing empty lines for l_braces that have children, i.e. - // for function expressions (lambdas, arrows, etc). - !Current.MatchingParen->Children.empty()) { - // lambdas and arrow functions are expressions, thus their r_brace is not - // on its own line, and thus not covered by UnwrappedLineFormatter's logic - // about removing empty lines on closing blocks. Special case them here. - MaxEmptyLinesToKeep = 1; - } - unsigned Newlines = std::max( - 1u, std::min(Current.NewlinesBefore, MaxEmptyLinesToKeep)); - bool ContinuePPDirective = - State.Line->InPPDirective && State.Line->Type != LT_ImportStatement; - Whitespaces.replaceWhitespace(Current, Newlines, State.Column, State.Column, - ContinuePPDirective); - } - - if (!Current.isTrailingComment()) - State.Stack.back().LastSpace = State.Column; - if (Current.is(tok::lessless)) - // If we are breaking before a "<<", we always want to indent relative to - // RHS. This is necessary only for "<<", as we special-case it and don't - // always indent relative to the RHS. - State.Stack.back().LastSpace += 3; // 3 -> width of "<< ". - - State.StartOfLineLevel = Current.NestingLevel; - State.LowestLevelOnLine = Current.NestingLevel; - - // Any break on this level means that the parent level has been broken - // and we need to avoid bin packing there. - bool NestedBlockSpecialCase = - !Style.isCpp() && Current.is(tok::r_brace) && State.Stack.size() > 1 && - State.Stack[State.Stack.size() - 2].NestedBlockInlined; - if (!NestedBlockSpecialCase) - for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; - - if (PreviousNonComment && - !PreviousNonComment->isOneOf(tok::comma, tok::colon, tok::semi) && - (PreviousNonComment->isNot(TT_TemplateCloser) || - Current.NestingLevel != 0) && - !PreviousNonComment->isOneOf( - TT_BinaryOperator, TT_FunctionAnnotationRParen, TT_JavaAnnotation, - TT_LeadingJavaAnnotation) && - Current.isNot(TT_BinaryOperator) && !PreviousNonComment->opensScope()) - State.Stack.back().BreakBeforeParameter = true; - - // If we break after { or the [ of an array initializer, we should also break - // before the corresponding } or ]. - if (PreviousNonComment && - (PreviousNonComment->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - opensProtoMessageField(*PreviousNonComment, Style))) - State.Stack.back().BreakBeforeClosingBrace = true; - - if (State.Stack.back().AvoidBinPacking) { - // If we are breaking after '(', '{', '<', this is not bin packing - // unless AllowAllParametersOfDeclarationOnNextLine is false or this is a - // dict/object literal. - if (!Previous.isOneOf(tok::l_paren, tok::l_brace, TT_BinaryOperator) || - (!Style.AllowAllParametersOfDeclarationOnNextLine && - State.Line->MustBeDeclaration) || - Previous.is(TT_DictLiteral)) - State.Stack.back().BreakBeforeParameter = true; - } - - return Penalty; -} - -unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { - if (!State.NextToken || !State.NextToken->Previous) - return 0; - FormatToken &Current = *State.NextToken; - const FormatToken &Previous = *Current.Previous; - // If we are continuing an expression, we want to use the continuation indent. - unsigned ContinuationIndent = - std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + - Style.ContinuationIndentWidth; - const FormatToken *PreviousNonComment = Current.getPreviousNonComment(); - const FormatToken *NextNonComment = Previous.getNextNonComment(); - if (!NextNonComment) - NextNonComment = &Current; - - // Java specific bits. - if (Style.Language == FormatStyle::LK_Java && - Current.isOneOf(Keywords.kw_implements, Keywords.kw_extends)) - return std::max(State.Stack.back().LastSpace, - State.Stack.back().Indent + Style.ContinuationIndentWidth); - - if (NextNonComment->is(tok::l_brace) && NextNonComment->BlockKind == BK_Block) - return Current.NestingLevel == 0 ? State.FirstIndent - : State.Stack.back().Indent; - if ((Current.isOneOf(tok::r_brace, tok::r_square) || - (Current.is(tok::greater) && - (Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto))) && - State.Stack.size() > 1) { - if (Current.closesBlockOrBlockTypeList(Style)) - return State.Stack[State.Stack.size() - 2].NestedBlockIndent; - if (Current.MatchingParen && - Current.MatchingParen->BlockKind == BK_BracedInit) - return State.Stack[State.Stack.size() - 2].LastSpace; - return State.FirstIndent; - } - // Indent a closing parenthesis at the previous level if followed by a semi or - // opening brace. This allows indentations such as: - // foo( - // a, - // ); - // function foo( - // a, - // ) { - // code(); // - // } - if (Current.is(tok::r_paren) && State.Stack.size() > 1 && - (!Current.Next || Current.Next->isOneOf(tok::semi, tok::l_brace))) - return State.Stack[State.Stack.size() - 2].LastSpace; - if (NextNonComment->is(TT_TemplateString) && NextNonComment->closesScope()) - return State.Stack[State.Stack.size() - 2].LastSpace; - if (Current.is(tok::identifier) && Current.Next && - (Current.Next->is(TT_DictLiteral) || - ((Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - Current.Next->isOneOf(tok::less, tok::l_brace)))) - return State.Stack.back().Indent; - if (NextNonComment->is(TT_ObjCStringLiteral) && - State.StartOfStringLiteral != 0) - return State.StartOfStringLiteral - 1; - if (NextNonComment->isStringLiteral() && State.StartOfStringLiteral != 0) - return State.StartOfStringLiteral; - if (NextNonComment->is(tok::lessless) && - State.Stack.back().FirstLessLess != 0) - return State.Stack.back().FirstLessLess; - if (NextNonComment->isMemberAccess()) { - if (State.Stack.back().CallContinuation == 0) - return ContinuationIndent; - return State.Stack.back().CallContinuation; - } - if (State.Stack.back().QuestionColumn != 0 && - ((NextNonComment->is(tok::colon) && - NextNonComment->is(TT_ConditionalExpr)) || - Previous.is(TT_ConditionalExpr))) - return State.Stack.back().QuestionColumn; - if (Previous.is(tok::comma) && State.Stack.back().VariablePos != 0) - return State.Stack.back().VariablePos; - if ((PreviousNonComment && - (PreviousNonComment->ClosesTemplateDeclaration || - PreviousNonComment->isOneOf( - TT_AttributeParen, TT_AttributeSquare, TT_FunctionAnnotationRParen, - TT_JavaAnnotation, TT_LeadingJavaAnnotation))) || - (!Style.IndentWrappedFunctionNames && - NextNonComment->isOneOf(tok::kw_operator, TT_FunctionDeclarationName))) - return std::max(State.Stack.back().LastSpace, State.Stack.back().Indent); - if (NextNonComment->is(TT_SelectorName)) { - if (!State.Stack.back().ObjCSelectorNameFound) { - unsigned MinIndent = State.Stack.back().Indent; - if (shouldIndentWrappedSelectorName(Style, State.Line->Type)) - MinIndent = std::max(MinIndent, - State.FirstIndent + Style.ContinuationIndentWidth); - // If LongestObjCSelectorName is 0, we are indenting the first - // part of an ObjC selector (or a selector component which is - // not colon-aligned due to block formatting). - // - // Otherwise, we are indenting a subsequent part of an ObjC - // selector which should be colon-aligned to the longest - // component of the ObjC selector. - // - // In either case, we want to respect Style.IndentWrappedFunctionNames. - return MinIndent + - std::max(NextNonComment->LongestObjCSelectorName, - NextNonComment->ColumnWidth) - - NextNonComment->ColumnWidth; - } - if (!State.Stack.back().AlignColons) - return State.Stack.back().Indent; - if (State.Stack.back().ColonPos > NextNonComment->ColumnWidth) - return State.Stack.back().ColonPos - NextNonComment->ColumnWidth; - return State.Stack.back().Indent; - } - if (NextNonComment->is(tok::colon) && NextNonComment->is(TT_ObjCMethodExpr)) - return State.Stack.back().ColonPos; - if (NextNonComment->is(TT_ArraySubscriptLSquare)) { - if (State.Stack.back().StartOfArraySubscripts != 0) - return State.Stack.back().StartOfArraySubscripts; - return ContinuationIndent; - } - - // This ensure that we correctly format ObjC methods calls without inputs, - // i.e. where the last element isn't selector like: [callee method]; - if (NextNonComment->is(tok::identifier) && NextNonComment->FakeRParens == 0 && - NextNonComment->Next && NextNonComment->Next->is(TT_ObjCMethodExpr)) - return State.Stack.back().Indent; - - if (NextNonComment->isOneOf(TT_StartOfName, TT_PointerOrReference) || - Previous.isOneOf(tok::coloncolon, tok::equal, TT_JsTypeColon)) - return ContinuationIndent; - if (PreviousNonComment && PreviousNonComment->is(tok::colon) && - PreviousNonComment->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)) - return ContinuationIndent; - if (NextNonComment->is(TT_CtorInitializerComma)) - return State.Stack.back().Indent; - if (PreviousNonComment && PreviousNonComment->is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) - return State.Stack.back().Indent; - if (PreviousNonComment && PreviousNonComment->is(TT_InheritanceColon) && - Style.BreakInheritanceList == FormatStyle::BILS_AfterColon) - return State.Stack.back().Indent; - if (NextNonComment->isOneOf(TT_CtorInitializerColon, TT_InheritanceColon, - TT_InheritanceComma)) - return State.FirstIndent + Style.ConstructorInitializerIndentWidth; - if (Previous.is(tok::r_paren) && !Current.isBinaryOperator() && - !Current.isOneOf(tok::colon, tok::comment)) - return ContinuationIndent; - if (Current.is(TT_ProtoExtensionLSquare)) - return State.Stack.back().Indent; - if (State.Stack.back().Indent == State.FirstIndent && PreviousNonComment && - PreviousNonComment->isNot(tok::r_brace)) - // Ensure that we fall back to the continuation indent width instead of - // just flushing continuations left. - return State.Stack.back().Indent + Style.ContinuationIndentWidth; - return State.Stack.back().Indent; -} - -unsigned ContinuationIndenter::moveStateToNextToken(LineState &State, - bool DryRun, bool Newline) { - assert(State.Stack.size()); - const FormatToken &Current = *State.NextToken; - - if (Current.isOneOf(tok::comma, TT_BinaryOperator)) - State.Stack.back().NoLineBreakInOperand = false; - if (Current.is(TT_InheritanceColon)) - State.Stack.back().AvoidBinPacking = true; - if (Current.is(tok::lessless) && Current.isNot(TT_OverloadedOperator)) { - if (State.Stack.back().FirstLessLess == 0) - State.Stack.back().FirstLessLess = State.Column; - else - State.Stack.back().LastOperatorWrapped = Newline; - } - if (Current.is(TT_BinaryOperator) && Current.isNot(tok::lessless)) - State.Stack.back().LastOperatorWrapped = Newline; - if (Current.is(TT_ConditionalExpr) && Current.Previous && - !Current.Previous->is(TT_ConditionalExpr)) - State.Stack.back().LastOperatorWrapped = Newline; - if (Current.is(TT_ArraySubscriptLSquare) && - State.Stack.back().StartOfArraySubscripts == 0) - State.Stack.back().StartOfArraySubscripts = State.Column; - if (Style.BreakBeforeTernaryOperators && Current.is(tok::question)) - State.Stack.back().QuestionColumn = State.Column; - if (!Style.BreakBeforeTernaryOperators && Current.isNot(tok::colon)) { - const FormatToken *Previous = Current.Previous; - while (Previous && Previous->isTrailingComment()) - Previous = Previous->Previous; - if (Previous && Previous->is(tok::question)) - State.Stack.back().QuestionColumn = State.Column; - } - if (!Current.opensScope() && !Current.closesScope() && - !Current.is(TT_PointerOrReference)) - State.LowestLevelOnLine = - std::min(State.LowestLevelOnLine, Current.NestingLevel); - if (Current.isMemberAccess()) - State.Stack.back().StartOfFunctionCall = - !Current.NextOperator ? 0 : State.Column; - if (Current.is(TT_SelectorName)) - State.Stack.back().ObjCSelectorNameFound = true; - if (Current.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon) { - // Indent 2 from the column, so: - // SomeClass::SomeClass() - // : First(...), ... - // Next(...) - // ^ line up here. - State.Stack.back().Indent = - State.Column + - (Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma - ? 0 - : 2); - State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; - if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - State.Stack.back().AvoidBinPacking = true; - State.Stack.back().BreakBeforeParameter = false; - } - if (Current.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon) { - State.Stack.back().Indent = - State.FirstIndent + Style.ConstructorInitializerIndentWidth; - State.Stack.back().NestedBlockIndent = State.Stack.back().Indent; - if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - State.Stack.back().AvoidBinPacking = true; - } - if (Current.is(TT_InheritanceColon)) - State.Stack.back().Indent = - State.FirstIndent + Style.ConstructorInitializerIndentWidth; - if (Current.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && Newline) - State.Stack.back().NestedBlockIndent = - State.Column + Current.ColumnWidth + 1; - if (Current.isOneOf(TT_LambdaLSquare, TT_LambdaArrow)) - State.Stack.back().LastSpace = State.Column; - - // Insert scopes created by fake parenthesis. - const FormatToken *Previous = Current.getPreviousNonComment(); - - // Add special behavior to support a format commonly used for JavaScript - // closures: - // SomeFunction(function() { - // foo(); - // bar(); - // }, a, b, c); - if (Current.isNot(tok::comment) && Previous && - Previous->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) && - !Previous->is(TT_DictLiteral) && State.Stack.size() > 1 && - !State.Stack.back().HasMultipleNestedBlocks) { - if (State.Stack[State.Stack.size() - 2].NestedBlockInlined && Newline) - for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) - State.Stack[i].NoLineBreak = true; - State.Stack[State.Stack.size() - 2].NestedBlockInlined = false; - } - if (Previous && - (Previous->isOneOf(tok::l_paren, tok::comma, tok::colon) || - Previous->isOneOf(TT_BinaryOperator, TT_ConditionalExpr)) && - !Previous->isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { - State.Stack.back().NestedBlockInlined = - !Newline && - (Previous->isNot(tok::l_paren) || Previous->ParameterCount > 1); - } - - moveStatePastFakeLParens(State, Newline); - moveStatePastScopeCloser(State); - bool AllowBreak = !State.Stack.back().NoLineBreak && - !State.Stack.back().NoLineBreakInOperand; - moveStatePastScopeOpener(State, Newline); - moveStatePastFakeRParens(State); - - if (Current.is(TT_ObjCStringLiteral) && State.StartOfStringLiteral == 0) - State.StartOfStringLiteral = State.Column + 1; - else if (Current.isStringLiteral() && State.StartOfStringLiteral == 0) - State.StartOfStringLiteral = State.Column; - else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash) && - !Current.isStringLiteral()) - State.StartOfStringLiteral = 0; - - State.Column += Current.ColumnWidth; - State.NextToken = State.NextToken->Next; - - unsigned Penalty = - handleEndOfLine(Current, State, DryRun, AllowBreak); - - if (Current.Role) - Current.Role->formatFromToken(State, this, DryRun); - // If the previous has a special role, let it consume tokens as appropriate. - // It is necessary to start at the previous token for the only implemented - // role (comma separated list). That way, the decision whether or not to break - // after the "{" is already done and both options are tried and evaluated. - // FIXME: This is ugly, find a better way. - if (Previous && Previous->Role) - Penalty += Previous->Role->formatAfterToken(State, this, DryRun); - - return Penalty; -} - -void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, - bool Newline) { - const FormatToken &Current = *State.NextToken; - const FormatToken *Previous = Current.getPreviousNonComment(); - - // Don't add extra indentation for the first fake parenthesis after - // 'return', assignments or opening <({[. The indentation for these cases - // is special cased. - bool SkipFirstExtraIndent = - (Previous && (Previous->opensScope() || - Previous->isOneOf(tok::semi, tok::kw_return) || - (Previous->getPrecedence() == prec::Assignment && - Style.AlignOperands) || - Previous->is(TT_ObjCMethodExpr))); - for (SmallVectorImpl<prec::Level>::const_reverse_iterator - I = Current.FakeLParens.rbegin(), - E = Current.FakeLParens.rend(); - I != E; ++I) { - ParenState NewParenState = State.Stack.back(); - NewParenState.Tok = nullptr; - NewParenState.ContainsLineBreak = false; - NewParenState.LastOperatorWrapped = true; - NewParenState.NoLineBreak = - NewParenState.NoLineBreak || State.Stack.back().NoLineBreakInOperand; - - // Don't propagate AvoidBinPacking into subexpressions of arg/param lists. - if (*I > prec::Comma) - NewParenState.AvoidBinPacking = false; - - // Indent from 'LastSpace' unless these are fake parentheses encapsulating - // a builder type call after 'return' or, if the alignment after opening - // brackets is disabled. - if (!Current.isTrailingComment() && - (Style.AlignOperands || *I < prec::Assignment) && - (!Previous || Previous->isNot(tok::kw_return) || - (Style.Language != FormatStyle::LK_Java && *I > 0)) && - (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || - *I != prec::Comma || Current.NestingLevel == 0)) - NewParenState.Indent = - std::max(std::max(State.Column, NewParenState.Indent), - State.Stack.back().LastSpace); - - // Do not indent relative to the fake parentheses inserted for "." or "->". - // This is a special case to make the following to statements consistent: - // OuterFunction(InnerFunctionCall( // break - // ParameterToInnerFunction)); - // OuterFunction(SomeObject.InnerFunctionCall( // break - // ParameterToInnerFunction)); - if (*I > prec::Unknown) - NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); - if (*I != prec::Conditional && !Current.is(TT_UnaryOperator) && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) - NewParenState.StartOfFunctionCall = State.Column; - - // Always indent conditional expressions. Never indent expression where - // the 'operator' is ',', ';' or an assignment (i.e. *I <= - // prec::Assignment) as those have different indentation rules. Indent - // other expression, unless the indentation needs to be skipped. - if (*I == prec::Conditional || - (!SkipFirstExtraIndent && *I > prec::Assignment && - !Current.isTrailingComment())) - NewParenState.Indent += Style.ContinuationIndentWidth; - if ((Previous && !Previous->opensScope()) || *I != prec::Comma) - NewParenState.BreakBeforeParameter = false; - State.Stack.push_back(NewParenState); - SkipFirstExtraIndent = false; - } -} - -void ContinuationIndenter::moveStatePastFakeRParens(LineState &State) { - for (unsigned i = 0, e = State.NextToken->FakeRParens; i != e; ++i) { - unsigned VariablePos = State.Stack.back().VariablePos; - if (State.Stack.size() == 1) { - // Do not pop the last element. - break; - } - State.Stack.pop_back(); - State.Stack.back().VariablePos = VariablePos; - } -} - -void ContinuationIndenter::moveStatePastScopeOpener(LineState &State, - bool Newline) { - const FormatToken &Current = *State.NextToken; - if (!Current.opensScope()) - return; - - if (Current.MatchingParen && Current.BlockKind == BK_Block) { - moveStateToNewBlock(State); - return; - } - - unsigned NewIndent; - unsigned LastSpace = State.Stack.back().LastSpace; - bool AvoidBinPacking; - bool BreakBeforeParameter = false; - unsigned NestedBlockIndent = std::max(State.Stack.back().StartOfFunctionCall, - State.Stack.back().NestedBlockIndent); - if (Current.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - opensProtoMessageField(Current, Style)) { - if (Current.opensBlockOrBlockTypeList(Style)) { - NewIndent = Style.IndentWidth + - std::min(State.Column, State.Stack.back().NestedBlockIndent); - } else { - NewIndent = State.Stack.back().LastSpace + Style.ContinuationIndentWidth; - } - const FormatToken *NextNoComment = Current.getNextNonComment(); - bool EndsInComma = Current.MatchingParen && - Current.MatchingParen->Previous && - Current.MatchingParen->Previous->is(tok::comma); - AvoidBinPacking = EndsInComma || Current.is(TT_DictLiteral) || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto || - !Style.BinPackArguments || - (NextNoComment && - NextNoComment->isOneOf(TT_DesignatedInitializerPeriod, - TT_DesignatedInitializerLSquare)); - BreakBeforeParameter = EndsInComma; - if (Current.ParameterCount > 1) - NestedBlockIndent = std::max(NestedBlockIndent, State.Column + 1); - } else { - NewIndent = Style.ContinuationIndentWidth + - std::max(State.Stack.back().LastSpace, - State.Stack.back().StartOfFunctionCall); - - // Ensure that different different brackets force relative alignment, e.g.: - // void SomeFunction(vector< // break - // int> v); - // FIXME: We likely want to do this for more combinations of brackets. - if (Current.is(tok::less) && Current.ParentBracket == tok::l_paren) { - NewIndent = std::max(NewIndent, State.Stack.back().Indent); - LastSpace = std::max(LastSpace, State.Stack.back().Indent); - } - - bool EndsInComma = - Current.MatchingParen && - Current.MatchingParen->getPreviousNonComment() && - Current.MatchingParen->getPreviousNonComment()->is(tok::comma); - - // If ObjCBinPackProtocolList is unspecified, fall back to BinPackParameters - // for backwards compatibility. - bool ObjCBinPackProtocolList = - (Style.ObjCBinPackProtocolList == FormatStyle::BPS_Auto && - Style.BinPackParameters) || - Style.ObjCBinPackProtocolList == FormatStyle::BPS_Always; - - bool BinPackDeclaration = - (State.Line->Type != LT_ObjCDecl && Style.BinPackParameters) || - (State.Line->Type == LT_ObjCDecl && ObjCBinPackProtocolList); - - AvoidBinPacking = - (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) || - (State.Line->MustBeDeclaration && !BinPackDeclaration) || - (!State.Line->MustBeDeclaration && !Style.BinPackArguments) || - (Style.ExperimentalAutoDetectBinPacking && - (Current.PackingKind == PPK_OnePerLine || - (!BinPackInconclusiveFunctions && - Current.PackingKind == PPK_Inconclusive))); - - if (Current.is(TT_ObjCMethodExpr) && Current.MatchingParen) { - if (Style.ColumnLimit) { - // If this '[' opens an ObjC call, determine whether all parameters fit - // into one line and put one per line if they don't. - if (getLengthToMatchingParen(Current, State.Stack) + State.Column > - getColumnLimit(State)) - BreakBeforeParameter = true; - } else { - // For ColumnLimit = 0, we have to figure out whether there is or has to - // be a line break within this call. - for (const FormatToken *Tok = &Current; - Tok && Tok != Current.MatchingParen; Tok = Tok->Next) { - if (Tok->MustBreakBefore || - (Tok->CanBreakBefore && Tok->NewlinesBefore > 0)) { - BreakBeforeParameter = true; - break; - } - } - } - } - - if (Style.Language == FormatStyle::LK_JavaScript && EndsInComma) - BreakBeforeParameter = true; - } - // Generally inherit NoLineBreak from the current scope to nested scope. - // However, don't do this for non-empty nested blocks, dict literals and - // array literals as these follow different indentation rules. - bool NoLineBreak = - Current.Children.empty() && - !Current.isOneOf(TT_DictLiteral, TT_ArrayInitializerLSquare) && - (State.Stack.back().NoLineBreak || - State.Stack.back().NoLineBreakInOperand || - (Current.is(TT_TemplateOpener) && - State.Stack.back().ContainsUnwrappedBuilder)); - State.Stack.push_back( - ParenState(&Current, NewIndent, LastSpace, AvoidBinPacking, NoLineBreak)); - State.Stack.back().NestedBlockIndent = NestedBlockIndent; - State.Stack.back().BreakBeforeParameter = BreakBeforeParameter; - State.Stack.back().HasMultipleNestedBlocks = Current.BlockParameterCount > 1; - State.Stack.back().IsInsideObjCArrayLiteral = - Current.is(TT_ArrayInitializerLSquare) && Current.Previous && - Current.Previous->is(tok::at); -} - -void ContinuationIndenter::moveStatePastScopeCloser(LineState &State) { - const FormatToken &Current = *State.NextToken; - if (!Current.closesScope()) - return; - - // If we encounter a closing ), ], } or >, we can remove a level from our - // stacks. - if (State.Stack.size() > 1 && - (Current.isOneOf(tok::r_paren, tok::r_square, TT_TemplateString) || - (Current.is(tok::r_brace) && State.NextToken != State.Line->First) || - State.NextToken->is(TT_TemplateCloser) || - (Current.is(tok::greater) && Current.is(TT_DictLiteral)))) - State.Stack.pop_back(); - - // Reevaluate whether ObjC message arguments fit into one line. - // If a receiver spans multiple lines, e.g.: - // [[object block:^{ - // return 42; - // }] a:42 b:42]; - // BreakBeforeParameter is calculated based on an incorrect assumption - // (it is checked whether the whole expression fits into one line without - // considering a line break inside a message receiver). - // We check whether arguements fit after receiver scope closer (into the same - // line). - if (State.Stack.back().BreakBeforeParameter && Current.MatchingParen && - Current.MatchingParen->Previous) { - const FormatToken &CurrentScopeOpener = *Current.MatchingParen->Previous; - if (CurrentScopeOpener.is(TT_ObjCMethodExpr) && - CurrentScopeOpener.MatchingParen) { - int NecessarySpaceInLine = - getLengthToMatchingParen(CurrentScopeOpener, State.Stack) + - CurrentScopeOpener.TotalLength - Current.TotalLength - 1; - if (State.Column + Current.ColumnWidth + NecessarySpaceInLine <= - Style.ColumnLimit) - State.Stack.back().BreakBeforeParameter = false; - } - } - - if (Current.is(tok::r_square)) { - // If this ends the array subscript expr, reset the corresponding value. - const FormatToken *NextNonComment = Current.getNextNonComment(); - if (NextNonComment && NextNonComment->isNot(tok::l_square)) - State.Stack.back().StartOfArraySubscripts = 0; - } -} - -void ContinuationIndenter::moveStateToNewBlock(LineState &State) { - unsigned NestedBlockIndent = State.Stack.back().NestedBlockIndent; - // ObjC block sometimes follow special indentation rules. - unsigned NewIndent = - NestedBlockIndent + (State.NextToken->is(TT_ObjCBlockLBrace) - ? Style.ObjCBlockIndentWidth - : Style.IndentWidth); - State.Stack.push_back(ParenState(State.NextToken, NewIndent, - State.Stack.back().LastSpace, - /*AvoidBinPacking=*/true, - /*NoLineBreak=*/false)); - State.Stack.back().NestedBlockIndent = NestedBlockIndent; - State.Stack.back().BreakBeforeParameter = true; -} - -static unsigned getLastLineEndColumn(StringRef Text, unsigned StartColumn, - unsigned TabWidth, - encoding::Encoding Encoding) { - size_t LastNewlinePos = Text.find_last_of("\n"); - if (LastNewlinePos == StringRef::npos) { - return StartColumn + - encoding::columnWidthWithTabs(Text, StartColumn, TabWidth, Encoding); - } else { - return encoding::columnWidthWithTabs(Text.substr(LastNewlinePos), - /*StartColumn=*/0, TabWidth, Encoding); - } -} - -unsigned ContinuationIndenter::reformatRawStringLiteral( - const FormatToken &Current, LineState &State, - const FormatStyle &RawStringStyle, bool DryRun) { - unsigned StartColumn = State.Column - Current.ColumnWidth; - StringRef OldDelimiter = *getRawStringDelimiter(Current.TokenText); - StringRef NewDelimiter = - getCanonicalRawStringDelimiter(Style, RawStringStyle.Language); - if (NewDelimiter.empty() || OldDelimiter.empty()) - NewDelimiter = OldDelimiter; - // The text of a raw string is between the leading 'R"delimiter(' and the - // trailing 'delimiter)"'. - unsigned OldPrefixSize = 3 + OldDelimiter.size(); - unsigned OldSuffixSize = 2 + OldDelimiter.size(); - // We create a virtual text environment which expects a null-terminated - // string, so we cannot use StringRef. - std::string RawText = - Current.TokenText.substr(OldPrefixSize).drop_back(OldSuffixSize); - if (NewDelimiter != OldDelimiter) { - // Don't update to the canonical delimiter 'deli' if ')deli"' occurs in the - // raw string. - std::string CanonicalDelimiterSuffix = (")" + NewDelimiter + "\"").str(); - if (StringRef(RawText).contains(CanonicalDelimiterSuffix)) - NewDelimiter = OldDelimiter; - } - - unsigned NewPrefixSize = 3 + NewDelimiter.size(); - unsigned NewSuffixSize = 2 + NewDelimiter.size(); - - // The first start column is the column the raw text starts after formatting. - unsigned FirstStartColumn = StartColumn + NewPrefixSize; - - // The next start column is the intended indentation a line break inside - // the raw string at level 0. It is determined by the following rules: - // - if the content starts on newline, it is one level more than the current - // indent, and - // - if the content does not start on a newline, it is the first start - // column. - // These rules have the advantage that the formatted content both does not - // violate the rectangle rule and visually flows within the surrounding - // source. - bool ContentStartsOnNewline = Current.TokenText[OldPrefixSize] == '\n'; - // If this token is the last parameter (checked by looking if it's followed by - // `)`, the base the indent off the line's nested block indent. Otherwise, - // base the indent off the arguments indent, so we can achieve: - // fffffffffff(1, 2, 3, R"pb( - // key1: 1 # - // key2: 2)pb"); - // - // fffffffffff(1, 2, 3, - // R"pb( - // key1: 1 # - // key2: 2 - // )pb", - // 5); - unsigned CurrentIndent = (Current.Next && Current.Next->is(tok::r_paren)) - ? State.Stack.back().NestedBlockIndent - : State.Stack.back().Indent; - unsigned NextStartColumn = ContentStartsOnNewline - ? CurrentIndent + Style.IndentWidth - : FirstStartColumn; - - // The last start column is the column the raw string suffix starts if it is - // put on a newline. - // The last start column is the intended indentation of the raw string postfix - // if it is put on a newline. It is determined by the following rules: - // - if the raw string prefix starts on a newline, it is the column where - // that raw string prefix starts, and - // - if the raw string prefix does not start on a newline, it is the current - // indent. - unsigned LastStartColumn = Current.NewlinesBefore - ? FirstStartColumn - NewPrefixSize - : CurrentIndent; - - std::pair<tooling::Replacements, unsigned> Fixes = internal::reformat( - RawStringStyle, RawText, {tooling::Range(0, RawText.size())}, - FirstStartColumn, NextStartColumn, LastStartColumn, "<stdin>", - /*Status=*/nullptr); - - auto NewCode = applyAllReplacements(RawText, Fixes.first); - tooling::Replacements NoFixes; - if (!NewCode) { - return addMultilineToken(Current, State); - } - if (!DryRun) { - if (NewDelimiter != OldDelimiter) { - // In 'R"delimiter(...', the delimiter starts 2 characters after the start - // of the token. - SourceLocation PrefixDelimiterStart = - Current.Tok.getLocation().getLocWithOffset(2); - auto PrefixErr = Whitespaces.addReplacement(tooling::Replacement( - SourceMgr, PrefixDelimiterStart, OldDelimiter.size(), NewDelimiter)); - if (PrefixErr) { - llvm::errs() - << "Failed to update the prefix delimiter of a raw string: " - << llvm::toString(std::move(PrefixErr)) << "\n"; - } - // In 'R"delimiter(...)delimiter"', the suffix delimiter starts at - // position length - 1 - |delimiter|. - SourceLocation SuffixDelimiterStart = - Current.Tok.getLocation().getLocWithOffset(Current.TokenText.size() - - 1 - OldDelimiter.size()); - auto SuffixErr = Whitespaces.addReplacement(tooling::Replacement( - SourceMgr, SuffixDelimiterStart, OldDelimiter.size(), NewDelimiter)); - if (SuffixErr) { - llvm::errs() - << "Failed to update the suffix delimiter of a raw string: " - << llvm::toString(std::move(SuffixErr)) << "\n"; - } - } - SourceLocation OriginLoc = - Current.Tok.getLocation().getLocWithOffset(OldPrefixSize); - for (const tooling::Replacement &Fix : Fixes.first) { - auto Err = Whitespaces.addReplacement(tooling::Replacement( - SourceMgr, OriginLoc.getLocWithOffset(Fix.getOffset()), - Fix.getLength(), Fix.getReplacementText())); - if (Err) { - llvm::errs() << "Failed to reformat raw string: " - << llvm::toString(std::move(Err)) << "\n"; - } - } - } - unsigned RawLastLineEndColumn = getLastLineEndColumn( - *NewCode, FirstStartColumn, Style.TabWidth, Encoding); - State.Column = RawLastLineEndColumn + NewSuffixSize; - // Since we're updating the column to after the raw string literal here, we - // have to manually add the penalty for the prefix R"delim( over the column - // limit. - unsigned PrefixExcessCharacters = - StartColumn + NewPrefixSize > Style.ColumnLimit ? - StartColumn + NewPrefixSize - Style.ColumnLimit : 0; - bool IsMultiline = - ContentStartsOnNewline || (NewCode->find('\n') != std::string::npos); - if (IsMultiline) { - // Break before further function parameters on all levels. - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; - } - return Fixes.second + PrefixExcessCharacters * Style.PenaltyExcessCharacter; -} - -unsigned ContinuationIndenter::addMultilineToken(const FormatToken &Current, - LineState &State) { - // Break before further function parameters on all levels. - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; - - unsigned ColumnsUsed = State.Column; - // We can only affect layout of the first and the last line, so the penalty - // for all other lines is constant, and we ignore it. - State.Column = Current.LastLineColumnWidth; - - if (ColumnsUsed > getColumnLimit(State)) - return Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit(State)); - return 0; -} - -unsigned ContinuationIndenter::handleEndOfLine(const FormatToken &Current, - LineState &State, bool DryRun, - bool AllowBreak) { - unsigned Penalty = 0; - // Compute the raw string style to use in case this is a raw string literal - // that can be reformatted. - auto RawStringStyle = getRawStringStyle(Current, State); - if (RawStringStyle && !Current.Finalized) { - Penalty = reformatRawStringLiteral(Current, State, *RawStringStyle, DryRun); - } else if (Current.IsMultiline && Current.isNot(TT_BlockComment)) { - // Don't break multi-line tokens other than block comments and raw string - // literals. Instead, just update the state. - Penalty = addMultilineToken(Current, State); - } else if (State.Line->Type != LT_ImportStatement) { - // We generally don't break import statements. - LineState OriginalState = State; - - // Whether we force the reflowing algorithm to stay strictly within the - // column limit. - bool Strict = false; - // Whether the first non-strict attempt at reflowing did intentionally - // exceed the column limit. - bool Exceeded = false; - std::tie(Penalty, Exceeded) = breakProtrudingToken( - Current, State, AllowBreak, /*DryRun=*/true, Strict); - if (Exceeded) { - // If non-strict reflowing exceeds the column limit, try whether strict - // reflowing leads to an overall lower penalty. - LineState StrictState = OriginalState; - unsigned StrictPenalty = - breakProtrudingToken(Current, StrictState, AllowBreak, - /*DryRun=*/true, /*Strict=*/true) - .first; - Strict = StrictPenalty <= Penalty; - if (Strict) { - Penalty = StrictPenalty; - State = StrictState; - } - } - if (!DryRun) { - // If we're not in dry-run mode, apply the changes with the decision on - // strictness made above. - breakProtrudingToken(Current, OriginalState, AllowBreak, /*DryRun=*/false, - Strict); - } - } - if (State.Column > getColumnLimit(State)) { - unsigned ExcessCharacters = State.Column - getColumnLimit(State); - Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; - } - return Penalty; -} - -// Returns the enclosing function name of a token, or the empty string if not -// found. -static StringRef getEnclosingFunctionName(const FormatToken &Current) { - // Look for: 'function(' or 'function<templates>(' before Current. - auto Tok = Current.getPreviousNonComment(); - if (!Tok || !Tok->is(tok::l_paren)) - return ""; - Tok = Tok->getPreviousNonComment(); - if (!Tok) - return ""; - if (Tok->is(TT_TemplateCloser)) { - Tok = Tok->MatchingParen; - if (Tok) - Tok = Tok->getPreviousNonComment(); - } - if (!Tok || !Tok->is(tok::identifier)) - return ""; - return Tok->TokenText; -} - -llvm::Optional<FormatStyle> -ContinuationIndenter::getRawStringStyle(const FormatToken &Current, - const LineState &State) { - if (!Current.isStringLiteral()) - return None; - auto Delimiter = getRawStringDelimiter(Current.TokenText); - if (!Delimiter) - return None; - auto RawStringStyle = RawStringFormats.getDelimiterStyle(*Delimiter); - if (!RawStringStyle && Delimiter->empty()) - RawStringStyle = RawStringFormats.getEnclosingFunctionStyle( - getEnclosingFunctionName(Current)); - if (!RawStringStyle) - return None; - RawStringStyle->ColumnLimit = getColumnLimit(State); - return RawStringStyle; -} - -std::unique_ptr<BreakableToken> ContinuationIndenter::createBreakableToken( - const FormatToken &Current, LineState &State, bool AllowBreak) { - unsigned StartColumn = State.Column - Current.ColumnWidth; - if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for Java and JS, as - // it requires strings to be merged using "+" which we don't support. - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript || - !Style.BreakStringLiterals || - !AllowBreak) - return nullptr; - - // Don't break string literals inside preprocessor directives (except for - // #define directives, as their contents are stored in separate lines and - // are not affected by this check). - // This way we avoid breaking code with line directives and unknown - // preprocessor directives that contain long string literals. - if (State.Line->Type == LT_PreprocessorDirective) - return nullptr; - // Exempts unterminated string literals from line breaking. The user will - // likely want to terminate the string before any line breaking is done. - if (Current.IsUnterminatedLiteral) - return nullptr; - // Don't break string literals inside Objective-C array literals (doing so - // raises the warning -Wobjc-string-concatenation). - if (State.Stack.back().IsInsideObjCArrayLiteral) { - return nullptr; - } - - StringRef Text = Current.TokenText; - StringRef Prefix; - StringRef Postfix; - // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. - // FIXME: Store Prefix and Suffix (or PrefixLength and SuffixLength to - // reduce the overhead) for each FormatToken, which is a string, so that we - // don't run multiple checks here on the hot path. - if ((Text.endswith(Postfix = "\"") && - (Text.startswith(Prefix = "@\"") || Text.startswith(Prefix = "\"") || - Text.startswith(Prefix = "u\"") || Text.startswith(Prefix = "U\"") || - Text.startswith(Prefix = "u8\"") || - Text.startswith(Prefix = "L\""))) || - (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - // We need this to address the case where there is an unbreakable tail - // only if certain other formatting decisions have been taken. The - // UnbreakableTailLength of Current is an overapproximation is that case - // and we need to be correct here. - unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) - ? 0 - : Current.UnbreakableTailLength; - return llvm::make_unique<BreakableStringLiteral>( - Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, - State.Line->InPPDirective, Encoding, Style); - } - } else if (Current.is(TT_BlockComment)) { - if (!Style.ReflowComments || - // If a comment token switches formatting, like - // /* clang-format on */, we don't want to break it further, - // but we may still want to adjust its indentation. - switchesFormatting(Current)) { - return nullptr; - } - return llvm::make_unique<BreakableBlockComment>( - Current, StartColumn, Current.OriginalColumn, !Current.Previous, - State.Line->InPPDirective, Encoding, Style); - } else if (Current.is(TT_LineComment) && - (Current.Previous == nullptr || - Current.Previous->isNot(TT_ImplicitStringLiteral))) { - if (!Style.ReflowComments || - CommentPragmasRegex.match(Current.TokenText.substr(2)) || - switchesFormatting(Current)) - return nullptr; - return llvm::make_unique<BreakableLineCommentSection>( - Current, StartColumn, Current.OriginalColumn, !Current.Previous, - /*InPPDirective=*/false, Encoding, Style); - } - return nullptr; -} - -std::pair<unsigned, bool> -ContinuationIndenter::breakProtrudingToken(const FormatToken &Current, - LineState &State, bool AllowBreak, - bool DryRun, bool Strict) { - std::unique_ptr<const BreakableToken> Token = - createBreakableToken(Current, State, AllowBreak); - if (!Token) - return {0, false}; - assert(Token->getLineCount() > 0); - unsigned ColumnLimit = getColumnLimit(State); - if (Current.is(TT_LineComment)) { - // We don't insert backslashes when breaking line comments. - ColumnLimit = Style.ColumnLimit; - } - if (Current.UnbreakableTailLength >= ColumnLimit) - return {0, false}; - // ColumnWidth was already accounted into State.Column before calling - // breakProtrudingToken. - unsigned StartColumn = State.Column - Current.ColumnWidth; - unsigned NewBreakPenalty = Current.isStringLiteral() - ? Style.PenaltyBreakString - : Style.PenaltyBreakComment; - // Stores whether we intentionally decide to let a line exceed the column - // limit. - bool Exceeded = false; - // Stores whether we introduce a break anywhere in the token. - bool BreakInserted = Token->introducesBreakBeforeToken(); - // Store whether we inserted a new line break at the end of the previous - // logical line. - bool NewBreakBefore = false; - // We use a conservative reflowing strategy. Reflow starts after a line is - // broken or the corresponding whitespace compressed. Reflow ends as soon as a - // line that doesn't get reflown with the previous line is reached. - bool Reflow = false; - // Keep track of where we are in the token: - // Where we are in the content of the current logical line. - unsigned TailOffset = 0; - // The column number we're currently at. - unsigned ContentStartColumn = - Token->getContentStartColumn(0, /*Break=*/false); - // The number of columns left in the current logical line after TailOffset. - unsigned RemainingTokenColumns = - Token->getRemainingLength(0, TailOffset, ContentStartColumn); - // Adapt the start of the token, for example indent. - if (!DryRun) - Token->adaptStartOfLine(0, Whitespaces); - - unsigned ContentIndent = 0; - unsigned Penalty = 0; - LLVM_DEBUG(llvm::dbgs() << "Breaking protruding token at column " - << StartColumn << ".\n"); - for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); - LineIndex != EndIndex; ++LineIndex) { - LLVM_DEBUG(llvm::dbgs() - << " Line: " << LineIndex << " (Reflow: " << Reflow << ")\n"); - NewBreakBefore = false; - // If we did reflow the previous line, we'll try reflowing again. Otherwise - // we'll start reflowing if the current line is broken or whitespace is - // compressed. - bool TryReflow = Reflow; - // Break the current token until we can fit the rest of the line. - while (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { - LLVM_DEBUG(llvm::dbgs() << " Over limit, need: " - << (ContentStartColumn + RemainingTokenColumns) - << ", space: " << ColumnLimit - << ", reflown prefix: " << ContentStartColumn - << ", offset in line: " << TailOffset << "\n"); - // If the current token doesn't fit, find the latest possible split in the - // current line so that breaking at it will be under the column limit. - // FIXME: Use the earliest possible split while reflowing to correctly - // compress whitespace within a line. - BreakableToken::Split Split = - Token->getSplit(LineIndex, TailOffset, ColumnLimit, - ContentStartColumn, CommentPragmasRegex); - if (Split.first == StringRef::npos) { - // No break opportunity - update the penalty and continue with the next - // logical line. - if (LineIndex < EndIndex - 1) - // The last line's penalty is handled in addNextStateToQueue() or when - // calling replaceWhitespaceAfterLastLine below. - Penalty += Style.PenaltyExcessCharacter * - (ContentStartColumn + RemainingTokenColumns - ColumnLimit); - LLVM_DEBUG(llvm::dbgs() << " No break opportunity.\n"); - break; - } - assert(Split.first != 0); - - if (Token->supportsReflow()) { - // Check whether the next natural split point after the current one can - // still fit the line, either because we can compress away whitespace, - // or because the penalty the excess characters introduce is lower than - // the break penalty. - // We only do this for tokens that support reflowing, and thus allow us - // to change the whitespace arbitrarily (e.g. comments). - // Other tokens, like string literals, can be broken on arbitrary - // positions. - - // First, compute the columns from TailOffset to the next possible split - // position. - // For example: - // ColumnLimit: | - // // Some text that breaks - // ^ tail offset - // ^-- split - // ^-------- to split columns - // ^--- next split - // ^--------------- to next split columns - unsigned ToSplitColumns = Token->getRangeLength( - LineIndex, TailOffset, Split.first, ContentStartColumn); - LLVM_DEBUG(llvm::dbgs() << " ToSplit: " << ToSplitColumns << "\n"); - - BreakableToken::Split NextSplit = Token->getSplit( - LineIndex, TailOffset + Split.first + Split.second, ColumnLimit, - ContentStartColumn + ToSplitColumns + 1, CommentPragmasRegex); - // Compute the columns necessary to fit the next non-breakable sequence - // into the current line. - unsigned ToNextSplitColumns = 0; - if (NextSplit.first == StringRef::npos) { - ToNextSplitColumns = Token->getRemainingLength(LineIndex, TailOffset, - ContentStartColumn); - } else { - ToNextSplitColumns = Token->getRangeLength( - LineIndex, TailOffset, - Split.first + Split.second + NextSplit.first, ContentStartColumn); - } - // Compress the whitespace between the break and the start of the next - // unbreakable sequence. - ToNextSplitColumns = - Token->getLengthAfterCompression(ToNextSplitColumns, Split); - LLVM_DEBUG(llvm::dbgs() - << " ContentStartColumn: " << ContentStartColumn << "\n"); - LLVM_DEBUG(llvm::dbgs() - << " ToNextSplit: " << ToNextSplitColumns << "\n"); - // If the whitespace compression makes us fit, continue on the current - // line. - bool ContinueOnLine = - ContentStartColumn + ToNextSplitColumns <= ColumnLimit; - unsigned ExcessCharactersPenalty = 0; - if (!ContinueOnLine && !Strict) { - // Similarly, if the excess characters' penalty is lower than the - // penalty of introducing a new break, continue on the current line. - ExcessCharactersPenalty = - (ContentStartColumn + ToNextSplitColumns - ColumnLimit) * - Style.PenaltyExcessCharacter; - LLVM_DEBUG(llvm::dbgs() - << " Penalty excess: " << ExcessCharactersPenalty - << "\n break : " << NewBreakPenalty << "\n"); - if (ExcessCharactersPenalty < NewBreakPenalty) { - Exceeded = true; - ContinueOnLine = true; - } - } - if (ContinueOnLine) { - LLVM_DEBUG(llvm::dbgs() << " Continuing on line...\n"); - // The current line fits after compressing the whitespace - reflow - // the next line into it if possible. - TryReflow = true; - if (!DryRun) - Token->compressWhitespace(LineIndex, TailOffset, Split, - Whitespaces); - // When we continue on the same line, leave one space between content. - ContentStartColumn += ToSplitColumns + 1; - Penalty += ExcessCharactersPenalty; - TailOffset += Split.first + Split.second; - RemainingTokenColumns = Token->getRemainingLength( - LineIndex, TailOffset, ContentStartColumn); - continue; - } - } - LLVM_DEBUG(llvm::dbgs() << " Breaking...\n"); - // Update the ContentIndent only if the current line was not reflown with - // the previous line, since in that case the previous line should still - // determine the ContentIndent. Also never intent the last line. - if (!Reflow) - ContentIndent = Token->getContentIndent(LineIndex); - LLVM_DEBUG(llvm::dbgs() - << " ContentIndent: " << ContentIndent << "\n"); - ContentStartColumn = ContentIndent + Token->getContentStartColumn( - LineIndex, /*Break=*/true); - - unsigned NewRemainingTokenColumns = Token->getRemainingLength( - LineIndex, TailOffset + Split.first + Split.second, - ContentStartColumn); - if (NewRemainingTokenColumns == 0) { - // No content to indent. - ContentIndent = 0; - ContentStartColumn = - Token->getContentStartColumn(LineIndex, /*Break=*/true); - NewRemainingTokenColumns = Token->getRemainingLength( - LineIndex, TailOffset + Split.first + Split.second, - ContentStartColumn); - } - - // When breaking before a tab character, it may be moved by a few columns, - // but will still be expanded to the next tab stop, so we don't save any - // columns. - if (NewRemainingTokenColumns == RemainingTokenColumns) { - // FIXME: Do we need to adjust the penalty? - break; - } - assert(NewRemainingTokenColumns < RemainingTokenColumns); - - LLVM_DEBUG(llvm::dbgs() << " Breaking at: " << TailOffset + Split.first - << ", " << Split.second << "\n"); - if (!DryRun) - Token->insertBreak(LineIndex, TailOffset, Split, ContentIndent, - Whitespaces); - - Penalty += NewBreakPenalty; - TailOffset += Split.first + Split.second; - RemainingTokenColumns = NewRemainingTokenColumns; - BreakInserted = true; - NewBreakBefore = true; - } - // In case there's another line, prepare the state for the start of the next - // line. - if (LineIndex + 1 != EndIndex) { - unsigned NextLineIndex = LineIndex + 1; - if (NewBreakBefore) - // After breaking a line, try to reflow the next line into the current - // one once RemainingTokenColumns fits. - TryReflow = true; - if (TryReflow) { - // We decided that we want to try reflowing the next line into the - // current one. - // We will now adjust the state as if the reflow is successful (in - // preparation for the next line), and see whether that works. If we - // decide that we cannot reflow, we will later reset the state to the - // start of the next line. - Reflow = false; - // As we did not continue breaking the line, RemainingTokenColumns is - // known to fit after ContentStartColumn. Adapt ContentStartColumn to - // the position at which we want to format the next line if we do - // actually reflow. - // When we reflow, we need to add a space between the end of the current - // line and the next line's start column. - ContentStartColumn += RemainingTokenColumns + 1; - // Get the split that we need to reflow next logical line into the end - // of the current one; the split will include any leading whitespace of - // the next logical line. - BreakableToken::Split SplitBeforeNext = - Token->getReflowSplit(NextLineIndex, CommentPragmasRegex); - LLVM_DEBUG(llvm::dbgs() - << " Size of reflown text: " << ContentStartColumn - << "\n Potential reflow split: "); - if (SplitBeforeNext.first != StringRef::npos) { - LLVM_DEBUG(llvm::dbgs() << SplitBeforeNext.first << ", " - << SplitBeforeNext.second << "\n"); - TailOffset = SplitBeforeNext.first + SplitBeforeNext.second; - // If the rest of the next line fits into the current line below the - // column limit, we can safely reflow. - RemainingTokenColumns = Token->getRemainingLength( - NextLineIndex, TailOffset, ContentStartColumn); - Reflow = true; - if (ContentStartColumn + RemainingTokenColumns > ColumnLimit) { - LLVM_DEBUG(llvm::dbgs() - << " Over limit after reflow, need: " - << (ContentStartColumn + RemainingTokenColumns) - << ", space: " << ColumnLimit - << ", reflown prefix: " << ContentStartColumn - << ", offset in line: " << TailOffset << "\n"); - // If the whole next line does not fit, try to find a point in - // the next line at which we can break so that attaching the part - // of the next line to that break point onto the current line is - // below the column limit. - BreakableToken::Split Split = - Token->getSplit(NextLineIndex, TailOffset, ColumnLimit, - ContentStartColumn, CommentPragmasRegex); - if (Split.first == StringRef::npos) { - LLVM_DEBUG(llvm::dbgs() << " Did not find later break\n"); - Reflow = false; - } else { - // Check whether the first split point gets us below the column - // limit. Note that we will execute this split below as part of - // the normal token breaking and reflow logic within the line. - unsigned ToSplitColumns = Token->getRangeLength( - NextLineIndex, TailOffset, Split.first, ContentStartColumn); - if (ContentStartColumn + ToSplitColumns > ColumnLimit) { - LLVM_DEBUG(llvm::dbgs() << " Next split protrudes, need: " - << (ContentStartColumn + ToSplitColumns) - << ", space: " << ColumnLimit); - unsigned ExcessCharactersPenalty = - (ContentStartColumn + ToSplitColumns - ColumnLimit) * - Style.PenaltyExcessCharacter; - if (NewBreakPenalty < ExcessCharactersPenalty) { - Reflow = false; - } - } - } - } - } else { - LLVM_DEBUG(llvm::dbgs() << "not found.\n"); - } - } - if (!Reflow) { - // If we didn't reflow into the next line, the only space to consider is - // the next logical line. Reset our state to match the start of the next - // line. - TailOffset = 0; - ContentStartColumn = - Token->getContentStartColumn(NextLineIndex, /*Break=*/false); - RemainingTokenColumns = Token->getRemainingLength( - NextLineIndex, TailOffset, ContentStartColumn); - // Adapt the start of the token, for example indent. - if (!DryRun) - Token->adaptStartOfLine(NextLineIndex, Whitespaces); - } else { - // If we found a reflow split and have added a new break before the next - // line, we are going to remove the line break at the start of the next - // logical line. For example, here we'll add a new line break after - // 'text', and subsequently delete the line break between 'that' and - // 'reflows'. - // // some text that - // // reflows - // -> - // // some text - // // that reflows - // When adding the line break, we also added the penalty for it, so we - // need to subtract that penalty again when we remove the line break due - // to reflowing. - if (NewBreakBefore) { - assert(Penalty >= NewBreakPenalty); - Penalty -= NewBreakPenalty; - } - if (!DryRun) - Token->reflow(NextLineIndex, Whitespaces); - } - } - } - - BreakableToken::Split SplitAfterLastLine = - Token->getSplitAfterLastLine(TailOffset); - if (SplitAfterLastLine.first != StringRef::npos) { - LLVM_DEBUG(llvm::dbgs() << "Replacing whitespace after last line.\n"); - - // We add the last line's penalty here, since that line is going to be split - // now. - Penalty += Style.PenaltyExcessCharacter * - (ContentStartColumn + RemainingTokenColumns - ColumnLimit); - - if (!DryRun) - Token->replaceWhitespaceAfterLastLine(TailOffset, SplitAfterLastLine, - Whitespaces); - ContentStartColumn = - Token->getContentStartColumn(Token->getLineCount() - 1, /*Break=*/true); - RemainingTokenColumns = Token->getRemainingLength( - Token->getLineCount() - 1, - TailOffset + SplitAfterLastLine.first + SplitAfterLastLine.second, - ContentStartColumn); - } - - State.Column = ContentStartColumn + RemainingTokenColumns - - Current.UnbreakableTailLength; - - if (BreakInserted) { - // If we break the token inside a parameter list, we need to break before - // the next parameter on all levels, so that the next parameter is clearly - // visible. Line comments already introduce a break. - if (Current.isNot(TT_LineComment)) { - for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) - State.Stack[i].BreakBeforeParameter = true; - } - - if (Current.is(TT_BlockComment)) - State.NoContinuation = true; - - State.Stack.back().LastSpace = StartColumn; - } - - Token->updateNextToken(State); - - return {Penalty, Exceeded}; -} - -unsigned ContinuationIndenter::getColumnLimit(const LineState &State) const { - // In preprocessor directives reserve two chars for trailing " \" - return Style.ColumnLimit - (State.Line->InPPDirective ? 2 : 0); -} - -bool ContinuationIndenter::nextIsMultilineString(const LineState &State) { - const FormatToken &Current = *State.NextToken; - if (!Current.isStringLiteral() || Current.is(TT_ImplicitStringLiteral)) - return false; - // We never consider raw string literals "multiline" for the purpose of - // AlwaysBreakBeforeMultilineStrings implementation as they are special-cased - // (see TokenAnnotator::mustBreakBefore(). - if (Current.TokenText.startswith("R\"")) - return false; - if (Current.IsMultiline) - return true; - if (Current.getNextNonComment() && - Current.getNextNonComment()->isStringLiteral()) - return true; // Implicit concatenation. - if (Style.ColumnLimit != 0 && Style.BreakStringLiterals && - State.Column + Current.ColumnWidth + Current.UnbreakableTailLength > - Style.ColumnLimit) - return true; // String will be split. - return false; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.h b/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.h deleted file mode 100644 index fde89db864b..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/ContinuationIndenter.h +++ /dev/null @@ -1,453 +0,0 @@ -//===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements an indenter that manages the indentation of -/// continuations. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H -#define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H - -#include "Encoding.h" -#include "FormatToken.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Regex.h" -#include <map> -#include <tuple> - -namespace clang { -class SourceManager; - -namespace format { - -class AnnotatedLine; -class BreakableToken; -struct FormatToken; -struct LineState; -struct ParenState; -struct RawStringFormatStyleManager; -class WhitespaceManager; - -struct RawStringFormatStyleManager { - llvm::StringMap<FormatStyle> DelimiterStyle; - llvm::StringMap<FormatStyle> EnclosingFunctionStyle; - - RawStringFormatStyleManager(const FormatStyle &CodeStyle); - - llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; - - llvm::Optional<FormatStyle> - getEnclosingFunctionStyle(StringRef EnclosingFunction) const; -}; - -class ContinuationIndenter { -public: - /// Constructs a \c ContinuationIndenter to format \p Line starting in - /// column \p FirstIndent. - ContinuationIndenter(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - const SourceManager &SourceMgr, - WhitespaceManager &Whitespaces, - encoding::Encoding Encoding, - bool BinPackInconclusiveFunctions); - - /// Get the initial state, i.e. the state after placing \p Line's - /// first token at \p FirstIndent. When reformatting a fragment of code, as in - /// the case of formatting inside raw string literals, \p FirstStartColumn is - /// the column at which the state of the parent formatter is. - LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, - const AnnotatedLine *Line, bool DryRun); - - // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a - // better home. - /// Returns \c true, if a line break after \p State is allowed. - bool canBreak(const LineState &State); - - /// Returns \c true, if a line break after \p State is mandatory. - bool mustBreak(const LineState &State); - - /// Appends the next token to \p State and updates information - /// necessary for indentation. - /// - /// Puts the token on the current line if \p Newline is \c false and adds a - /// line break and necessary indentation otherwise. - /// - /// If \p DryRun is \c false, also creates and stores the required - /// \c Replacement. - unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, - unsigned ExtraSpaces = 0); - - /// Get the column limit for this line. This is the style's column - /// limit, potentially reduced for preprocessor definitions. - unsigned getColumnLimit(const LineState &State) const; - -private: - /// Mark the next token as consumed in \p State and modify its stacks - /// accordingly. - unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); - - /// Update 'State' according to the next token's fake left parentheses. - void moveStatePastFakeLParens(LineState &State, bool Newline); - /// Update 'State' according to the next token's fake r_parens. - void moveStatePastFakeRParens(LineState &State); - - /// Update 'State' according to the next token being one of "(<{[". - void moveStatePastScopeOpener(LineState &State, bool Newline); - /// Update 'State' according to the next token being one of ")>}]". - void moveStatePastScopeCloser(LineState &State); - /// Update 'State' with the next token opening a nested block. - void moveStateToNewBlock(LineState &State); - - /// Reformats a raw string literal. - /// - /// \returns An extra penalty induced by reformatting the token. - unsigned reformatRawStringLiteral(const FormatToken &Current, - LineState &State, - const FormatStyle &RawStringStyle, - bool DryRun); - - /// If the current token is at the end of the current line, handle - /// the transition to the next line. - unsigned handleEndOfLine(const FormatToken &Current, LineState &State, - bool DryRun, bool AllowBreak); - - /// If \p Current is a raw string that is configured to be reformatted, - /// return the style to be used. - llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, - const LineState &State); - - /// If the current token sticks out over the end of the line, break - /// it if possible. - /// - /// \returns A pair (penalty, exceeded), where penalty is the extra penalty - /// when tokens are broken or lines exceed the column limit, and exceeded - /// indicates whether the algorithm purposefully left lines exceeding the - /// column limit. - /// - /// The returned penalty will cover the cost of the additional line breaks - /// and column limit violation in all lines except for the last one. The - /// penalty for the column limit violation in the last line (and in single - /// line tokens) is handled in \c addNextStateToQueue. - /// - /// \p Strict indicates whether reflowing is allowed to leave characters - /// protruding the column limit; if true, lines will be split strictly within - /// the column limit where possible; if false, words are allowed to protrude - /// over the column limit as long as the penalty is less than the penalty - /// of a break. - std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, - LineState &State, - bool AllowBreak, bool DryRun, - bool Strict); - - /// Returns the \c BreakableToken starting at \p Current, or nullptr - /// if the current token cannot be broken. - std::unique_ptr<BreakableToken> - createBreakableToken(const FormatToken &Current, LineState &State, - bool AllowBreak); - - /// Appends the next token to \p State and updates information - /// necessary for indentation. - /// - /// Puts the token on the current line. - /// - /// If \p DryRun is \c false, also creates and stores the required - /// \c Replacement. - void addTokenOnCurrentLine(LineState &State, bool DryRun, - unsigned ExtraSpaces); - - /// Appends the next token to \p State and updates information - /// necessary for indentation. - /// - /// Adds a line break and necessary indentation. - /// - /// If \p DryRun is \c false, also creates and stores the required - /// \c Replacement. - unsigned addTokenOnNewLine(LineState &State, bool DryRun); - - /// Calculate the new column for a line wrap before the next token. - unsigned getNewLineColumn(const LineState &State); - - /// Adds a multiline token to the \p State. - /// - /// \returns Extra penalty for the first line of the literal: last line is - /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't - /// matter, as we don't change them. - unsigned addMultilineToken(const FormatToken &Current, LineState &State); - - /// Returns \c true if the next token starts a multiline string - /// literal. - /// - /// This includes implicitly concatenated strings, strings that will be broken - /// by clang-format and string literals with escaped newlines. - bool nextIsMultilineString(const LineState &State); - - FormatStyle Style; - const AdditionalKeywords &Keywords; - const SourceManager &SourceMgr; - WhitespaceManager &Whitespaces; - encoding::Encoding Encoding; - bool BinPackInconclusiveFunctions; - llvm::Regex CommentPragmasRegex; - const RawStringFormatStyleManager RawStringFormats; -}; - -struct ParenState { - ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, - bool AvoidBinPacking, bool NoLineBreak) - : Tok(Tok), Indent(Indent), LastSpace(LastSpace), - NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), - AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), - NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), - LastOperatorWrapped(true), ContainsLineBreak(false), - ContainsUnwrappedBuilder(false), AlignColons(true), - ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), - NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} - - /// \brief The token opening this parenthesis level, or nullptr if this level - /// is opened by fake parenthesis. - /// - /// Not considered for memoization as it will always have the same value at - /// the same token. - const FormatToken *Tok; - - /// The position to which a specific parenthesis level needs to be - /// indented. - unsigned Indent; - - /// The position of the last space on each level. - /// - /// Used e.g. to break like: - /// functionCall(Parameter, otherCall( - /// OtherParameter)); - unsigned LastSpace; - - /// If a block relative to this parenthesis level gets wrapped, indent - /// it this much. - unsigned NestedBlockIndent; - - /// The position the first "<<" operator encountered on each level. - /// - /// Used to align "<<" operators. 0 if no such operator has been encountered - /// on a level. - unsigned FirstLessLess = 0; - - /// The column of a \c ? in a conditional expression; - unsigned QuestionColumn = 0; - - /// The position of the colon in an ObjC method declaration/call. - unsigned ColonPos = 0; - - /// The start of the most recent function in a builder-type call. - unsigned StartOfFunctionCall = 0; - - /// Contains the start of array subscript expressions, so that they - /// can be aligned. - unsigned StartOfArraySubscripts = 0; - - /// If a nested name specifier was broken over multiple lines, this - /// contains the start column of the second line. Otherwise 0. - unsigned NestedNameSpecifierContinuation = 0; - - /// If a call expression was broken over multiple lines, this - /// contains the start column of the second line. Otherwise 0. - unsigned CallContinuation = 0; - - /// The column of the first variable name in a variable declaration. - /// - /// Used to align further variables if necessary. - unsigned VariablePos = 0; - - /// Whether a newline needs to be inserted before the block's closing - /// brace. - /// - /// We only want to insert a newline before the closing brace if there also - /// was a newline after the beginning left brace. - bool BreakBeforeClosingBrace : 1; - - /// Avoid bin packing, i.e. multiple parameters/elements on multiple - /// lines, in this context. - bool AvoidBinPacking : 1; - - /// Break after the next comma (or all the commas in this context if - /// \c AvoidBinPacking is \c true). - bool BreakBeforeParameter : 1; - - /// Line breaking in this context would break a formatting rule. - bool NoLineBreak : 1; - - /// Same as \c NoLineBreak, but is restricted until the end of the - /// operand (including the next ","). - bool NoLineBreakInOperand : 1; - - /// True if the last binary operator on this level was wrapped to the - /// next line. - bool LastOperatorWrapped : 1; - - /// \c true if this \c ParenState already contains a line-break. - /// - /// The first line break in a certain \c ParenState causes extra penalty so - /// that clang-format prefers similar breaks, i.e. breaks in the same - /// parenthesis. - bool ContainsLineBreak : 1; - - /// \c true if this \c ParenState contains multiple segments of a - /// builder-type call on one line. - bool ContainsUnwrappedBuilder : 1; - - /// \c true if the colons of the curren ObjC method expression should - /// be aligned. - /// - /// Not considered for memoization as it will always have the same value at - /// the same token. - bool AlignColons : 1; - - /// \c true if at least one selector name was found in the current - /// ObjC method expression. - /// - /// Not considered for memoization as it will always have the same value at - /// the same token. - bool ObjCSelectorNameFound : 1; - - /// \c true if there are multiple nested blocks inside these parens. - /// - /// Not considered for memoization as it will always have the same value at - /// the same token. - bool HasMultipleNestedBlocks : 1; - - /// The start of a nested block (e.g. lambda introducer in C++ or - /// "function" in JavaScript) is not wrapped to a new line. - bool NestedBlockInlined : 1; - - /// \c true if the current \c ParenState represents an Objective-C - /// array literal. - bool IsInsideObjCArrayLiteral : 1; - - bool operator<(const ParenState &Other) const { - if (Indent != Other.Indent) - return Indent < Other.Indent; - if (LastSpace != Other.LastSpace) - return LastSpace < Other.LastSpace; - if (NestedBlockIndent != Other.NestedBlockIndent) - return NestedBlockIndent < Other.NestedBlockIndent; - if (FirstLessLess != Other.FirstLessLess) - return FirstLessLess < Other.FirstLessLess; - if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) - return BreakBeforeClosingBrace; - if (QuestionColumn != Other.QuestionColumn) - return QuestionColumn < Other.QuestionColumn; - if (AvoidBinPacking != Other.AvoidBinPacking) - return AvoidBinPacking; - if (BreakBeforeParameter != Other.BreakBeforeParameter) - return BreakBeforeParameter; - if (NoLineBreak != Other.NoLineBreak) - return NoLineBreak; - if (LastOperatorWrapped != Other.LastOperatorWrapped) - return LastOperatorWrapped; - if (ColonPos != Other.ColonPos) - return ColonPos < Other.ColonPos; - if (StartOfFunctionCall != Other.StartOfFunctionCall) - return StartOfFunctionCall < Other.StartOfFunctionCall; - if (StartOfArraySubscripts != Other.StartOfArraySubscripts) - return StartOfArraySubscripts < Other.StartOfArraySubscripts; - if (CallContinuation != Other.CallContinuation) - return CallContinuation < Other.CallContinuation; - if (VariablePos != Other.VariablePos) - return VariablePos < Other.VariablePos; - if (ContainsLineBreak != Other.ContainsLineBreak) - return ContainsLineBreak; - if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) - return ContainsUnwrappedBuilder; - if (NestedBlockInlined != Other.NestedBlockInlined) - return NestedBlockInlined; - return false; - } -}; - -/// The current state when indenting a unwrapped line. -/// -/// As the indenting tries different combinations this is copied by value. -struct LineState { - /// The number of used columns in the current line. - unsigned Column; - - /// The token that needs to be next formatted. - FormatToken *NextToken; - - /// \c true if this line contains a continued for-loop section. - bool LineContainsContinuedForLoopSection; - - /// \c true if \p NextToken should not continue this line. - bool NoContinuation; - - /// The \c NestingLevel at the start of this line. - unsigned StartOfLineLevel; - - /// The lowest \c NestingLevel on the current line. - unsigned LowestLevelOnLine; - - /// The start column of the string literal, if we're in a string - /// literal sequence, 0 otherwise. - unsigned StartOfStringLiteral; - - /// A stack keeping track of properties applying to parenthesis - /// levels. - std::vector<ParenState> Stack; - - /// Ignore the stack of \c ParenStates for state comparison. - /// - /// In long and deeply nested unwrapped lines, the current algorithm can - /// be insufficient for finding the best formatting with a reasonable amount - /// of time and memory. Setting this flag will effectively lead to the - /// algorithm not analyzing some combinations. However, these combinations - /// rarely contain the optimal solution: In short, accepting a higher - /// penalty early would need to lead to different values in the \c - /// ParenState stack (in an otherwise identical state) and these different - /// values would need to lead to a significant amount of avoided penalty - /// later. - /// - /// FIXME: Come up with a better algorithm instead. - bool IgnoreStackForComparison; - - /// The indent of the first token. - unsigned FirstIndent; - - /// The line that is being formatted. - /// - /// Does not need to be considered for memoization because it doesn't change. - const AnnotatedLine *Line; - - /// Comparison operator to be able to used \c LineState in \c map. - bool operator<(const LineState &Other) const { - if (NextToken != Other.NextToken) - return NextToken < Other.NextToken; - if (Column != Other.Column) - return Column < Other.Column; - if (LineContainsContinuedForLoopSection != - Other.LineContainsContinuedForLoopSection) - return LineContainsContinuedForLoopSection; - if (NoContinuation != Other.NoContinuation) - return NoContinuation; - if (StartOfLineLevel != Other.StartOfLineLevel) - return StartOfLineLevel < Other.StartOfLineLevel; - if (LowestLevelOnLine != Other.LowestLevelOnLine) - return LowestLevelOnLine < Other.LowestLevelOnLine; - if (StartOfStringLiteral != Other.StartOfStringLiteral) - return StartOfStringLiteral < Other.StartOfStringLiteral; - if (IgnoreStackForComparison || Other.IgnoreStackForComparison) - return false; - return Stack < Other.Stack; - } -}; - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/Encoding.h b/gnu/llvm/tools/clang/lib/Format/Encoding.h deleted file mode 100644 index 4c877e7e49d..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/Encoding.h +++ /dev/null @@ -1,128 +0,0 @@ -//===--- Encoding.h - Format C++ code ---------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Contains functions for text encoding manipulation. Supports UTF-8, -/// 8-bit encodings and escape sequences in C++ string literals. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_ENCODING_H -#define LLVM_CLANG_LIB_FORMAT_ENCODING_H - -#include "clang/Basic/LLVM.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/ConvertUTF.h" -#include "llvm/Support/Unicode.h" - -namespace clang { -namespace format { -namespace encoding { - -enum Encoding { - Encoding_UTF8, - Encoding_Unknown // We treat all other encodings as 8-bit encodings. -}; - -/// Detects encoding of the Text. If the Text can be decoded using UTF-8, -/// it is considered UTF8, otherwise we treat it as some 8-bit encoding. -inline Encoding detectEncoding(StringRef Text) { - const llvm::UTF8 *Ptr = reinterpret_cast<const llvm::UTF8 *>(Text.begin()); - const llvm::UTF8 *BufEnd = reinterpret_cast<const llvm::UTF8 *>(Text.end()); - if (llvm::isLegalUTF8String(&Ptr, BufEnd)) - return Encoding_UTF8; - return Encoding_Unknown; -} - -/// Returns the number of columns required to display the \p Text on a -/// generic Unicode-capable terminal. Text is assumed to use the specified -/// \p Encoding. -inline unsigned columnWidth(StringRef Text, Encoding Encoding) { - if (Encoding == Encoding_UTF8) { - int ContentWidth = llvm::sys::unicode::columnWidthUTF8(Text); - // FIXME: Figure out the correct way to handle this in the presence of both - // printable and unprintable multi-byte UTF-8 characters. Falling back to - // returning the number of bytes may cause problems, as columnWidth suddenly - // becomes non-additive. - if (ContentWidth >= 0) - return ContentWidth; - } - return Text.size(); -} - -/// Returns the number of columns required to display the \p Text, -/// starting from the \p StartColumn on a terminal with the \p TabWidth. The -/// text is assumed to use the specified \p Encoding. -inline unsigned columnWidthWithTabs(StringRef Text, unsigned StartColumn, - unsigned TabWidth, Encoding Encoding) { - unsigned TotalWidth = 0; - StringRef Tail = Text; - for (;;) { - StringRef::size_type TabPos = Tail.find('\t'); - if (TabPos == StringRef::npos) - return TotalWidth + columnWidth(Tail, Encoding); - TotalWidth += columnWidth(Tail.substr(0, TabPos), Encoding); - TotalWidth += TabWidth - (TotalWidth + StartColumn) % TabWidth; - Tail = Tail.substr(TabPos + 1); - } -} - -/// Gets the number of bytes in a sequence representing a single -/// codepoint and starting with FirstChar in the specified Encoding. -inline unsigned getCodePointNumBytes(char FirstChar, Encoding Encoding) { - switch (Encoding) { - case Encoding_UTF8: - return llvm::getNumBytesForUTF8(FirstChar); - default: - return 1; - } -} - -inline bool isOctDigit(char c) { return '0' <= c && c <= '7'; } - -inline bool isHexDigit(char c) { - return ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || - ('A' <= c && c <= 'F'); -} - -/// Gets the length of an escape sequence inside a C++ string literal. -/// Text should span from the beginning of the escape sequence (starting with a -/// backslash) to the end of the string literal. -inline unsigned getEscapeSequenceLength(StringRef Text) { - assert(Text[0] == '\\'); - if (Text.size() < 2) - return 1; - - switch (Text[1]) { - case 'u': - return 6; - case 'U': - return 10; - case 'x': { - unsigned I = 2; // Point after '\x'. - while (I < Text.size() && isHexDigit(Text[I])) - ++I; - return I; - } - default: - if (isOctDigit(Text[1])) { - unsigned I = 1; - while (I < Text.size() && I < 4 && isOctDigit(Text[I])) - ++I; - return I; - } - return 1 + llvm::getNumBytesForUTF8(Text[1]); - } -} - -} // namespace encoding -} // namespace format -} // namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/Format.cpp b/gnu/llvm/tools/clang/lib/Format/Format.cpp deleted file mode 100644 index 2c4f8760540..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/Format.cpp +++ /dev/null @@ -1,2424 +0,0 @@ -//===--- Format.cpp - Format C++ code -------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements functions declared in Format.h. This will be -/// split into separate files as we go. -/// -//===----------------------------------------------------------------------===// - -#include "clang/Format/Format.h" -#include "AffectedRangeManager.h" -#include "ContinuationIndenter.h" -#include "FormatInternal.h" -#include "FormatTokenLexer.h" -#include "NamespaceEndCommentsFixer.h" -#include "SortJavaScriptImports.h" -#include "TokenAnalyzer.h" -#include "TokenAnnotator.h" -#include "UnwrappedLineFormatter.h" -#include "UnwrappedLineParser.h" -#include "UsingDeclarationsSorter.h" -#include "WhitespaceManager.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Lex/Lexer.h" -#include "clang/Tooling/Inclusions/HeaderIncludes.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Path.h" -#include "llvm/Support/Regex.h" -#include "llvm/Support/VirtualFileSystem.h" -#include "llvm/Support/YAMLTraits.h" -#include <algorithm> -#include <memory> -#include <mutex> -#include <string> -#include <unordered_map> - -#define DEBUG_TYPE "format-formatter" - -using clang::format::FormatStyle; - -LLVM_YAML_IS_SEQUENCE_VECTOR(clang::format::FormatStyle::RawStringFormat) - -namespace llvm { -namespace yaml { -template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { - static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { - IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); - IO.enumCase(Value, "Java", FormatStyle::LK_Java); - IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); - IO.enumCase(Value, "ObjC", FormatStyle::LK_ObjC); - IO.enumCase(Value, "Proto", FormatStyle::LK_Proto); - IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); - IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::LanguageStandard> { - static void enumeration(IO &IO, FormatStyle::LanguageStandard &Value) { - IO.enumCase(Value, "Cpp03", FormatStyle::LS_Cpp03); - IO.enumCase(Value, "C++03", FormatStyle::LS_Cpp03); - IO.enumCase(Value, "Cpp11", FormatStyle::LS_Cpp11); - IO.enumCase(Value, "C++11", FormatStyle::LS_Cpp11); - IO.enumCase(Value, "Auto", FormatStyle::LS_Auto); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::UseTabStyle> { - static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { - IO.enumCase(Value, "Never", FormatStyle::UT_Never); - IO.enumCase(Value, "false", FormatStyle::UT_Never); - IO.enumCase(Value, "Always", FormatStyle::UT_Always); - IO.enumCase(Value, "true", FormatStyle::UT_Always); - IO.enumCase(Value, "ForIndentation", FormatStyle::UT_ForIndentation); - IO.enumCase(Value, "ForContinuationAndIndentation", - FormatStyle::UT_ForContinuationAndIndentation); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::JavaScriptQuoteStyle> { - static void enumeration(IO &IO, FormatStyle::JavaScriptQuoteStyle &Value) { - IO.enumCase(Value, "Leave", FormatStyle::JSQS_Leave); - IO.enumCase(Value, "Single", FormatStyle::JSQS_Single); - IO.enumCase(Value, "Double", FormatStyle::JSQS_Double); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::ShortFunctionStyle> { - static void enumeration(IO &IO, FormatStyle::ShortFunctionStyle &Value) { - IO.enumCase(Value, "None", FormatStyle::SFS_None); - IO.enumCase(Value, "false", FormatStyle::SFS_None); - IO.enumCase(Value, "All", FormatStyle::SFS_All); - IO.enumCase(Value, "true", FormatStyle::SFS_All); - IO.enumCase(Value, "Inline", FormatStyle::SFS_Inline); - IO.enumCase(Value, "InlineOnly", FormatStyle::SFS_InlineOnly); - IO.enumCase(Value, "Empty", FormatStyle::SFS_Empty); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::BinPackStyle> { - static void enumeration(IO &IO, FormatStyle::BinPackStyle &Value) { - IO.enumCase(Value, "Auto", FormatStyle::BPS_Auto); - IO.enumCase(Value, "Always", FormatStyle::BPS_Always); - IO.enumCase(Value, "Never", FormatStyle::BPS_Never); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::BinaryOperatorStyle> { - static void enumeration(IO &IO, FormatStyle::BinaryOperatorStyle &Value) { - IO.enumCase(Value, "All", FormatStyle::BOS_All); - IO.enumCase(Value, "true", FormatStyle::BOS_All); - IO.enumCase(Value, "None", FormatStyle::BOS_None); - IO.enumCase(Value, "false", FormatStyle::BOS_None); - IO.enumCase(Value, "NonAssignment", FormatStyle::BOS_NonAssignment); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::BraceBreakingStyle> { - static void enumeration(IO &IO, FormatStyle::BraceBreakingStyle &Value) { - IO.enumCase(Value, "Attach", FormatStyle::BS_Attach); - IO.enumCase(Value, "Linux", FormatStyle::BS_Linux); - IO.enumCase(Value, "Mozilla", FormatStyle::BS_Mozilla); - IO.enumCase(Value, "Stroustrup", FormatStyle::BS_Stroustrup); - IO.enumCase(Value, "Allman", FormatStyle::BS_Allman); - IO.enumCase(Value, "GNU", FormatStyle::BS_GNU); - IO.enumCase(Value, "WebKit", FormatStyle::BS_WebKit); - IO.enumCase(Value, "Custom", FormatStyle::BS_Custom); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::BreakConstructorInitializersStyle> { - static void - enumeration(IO &IO, FormatStyle::BreakConstructorInitializersStyle &Value) { - IO.enumCase(Value, "BeforeColon", FormatStyle::BCIS_BeforeColon); - IO.enumCase(Value, "BeforeComma", FormatStyle::BCIS_BeforeComma); - IO.enumCase(Value, "AfterColon", FormatStyle::BCIS_AfterColon); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::BreakInheritanceListStyle> { - static void - enumeration(IO &IO, FormatStyle::BreakInheritanceListStyle &Value) { - IO.enumCase(Value, "BeforeColon", FormatStyle::BILS_BeforeColon); - IO.enumCase(Value, "BeforeComma", FormatStyle::BILS_BeforeComma); - IO.enumCase(Value, "AfterColon", FormatStyle::BILS_AfterColon); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::PPDirectiveIndentStyle> { - static void enumeration(IO &IO, FormatStyle::PPDirectiveIndentStyle &Value) { - IO.enumCase(Value, "None", FormatStyle::PPDIS_None); - IO.enumCase(Value, "AfterHash", FormatStyle::PPDIS_AfterHash); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::ReturnTypeBreakingStyle> { - static void enumeration(IO &IO, FormatStyle::ReturnTypeBreakingStyle &Value) { - IO.enumCase(Value, "None", FormatStyle::RTBS_None); - IO.enumCase(Value, "All", FormatStyle::RTBS_All); - IO.enumCase(Value, "TopLevel", FormatStyle::RTBS_TopLevel); - IO.enumCase(Value, "TopLevelDefinitions", - FormatStyle::RTBS_TopLevelDefinitions); - IO.enumCase(Value, "AllDefinitions", FormatStyle::RTBS_AllDefinitions); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::BreakTemplateDeclarationsStyle> { - static void enumeration(IO &IO, FormatStyle::BreakTemplateDeclarationsStyle &Value) { - IO.enumCase(Value, "No", FormatStyle::BTDS_No); - IO.enumCase(Value, "MultiLine", FormatStyle::BTDS_MultiLine); - IO.enumCase(Value, "Yes", FormatStyle::BTDS_Yes); - - // For backward compatibility. - IO.enumCase(Value, "false", FormatStyle::BTDS_MultiLine); - IO.enumCase(Value, "true", FormatStyle::BTDS_Yes); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::DefinitionReturnTypeBreakingStyle> { - static void - enumeration(IO &IO, FormatStyle::DefinitionReturnTypeBreakingStyle &Value) { - IO.enumCase(Value, "None", FormatStyle::DRTBS_None); - IO.enumCase(Value, "All", FormatStyle::DRTBS_All); - IO.enumCase(Value, "TopLevel", FormatStyle::DRTBS_TopLevel); - - // For backward compatibility. - IO.enumCase(Value, "false", FormatStyle::DRTBS_None); - IO.enumCase(Value, "true", FormatStyle::DRTBS_All); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::NamespaceIndentationKind> { - static void enumeration(IO &IO, - FormatStyle::NamespaceIndentationKind &Value) { - IO.enumCase(Value, "None", FormatStyle::NI_None); - IO.enumCase(Value, "Inner", FormatStyle::NI_Inner); - IO.enumCase(Value, "All", FormatStyle::NI_All); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::BracketAlignmentStyle> { - static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { - IO.enumCase(Value, "Align", FormatStyle::BAS_Align); - IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); - IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); - - // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::BAS_Align); - IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::EscapedNewlineAlignmentStyle> { - static void enumeration(IO &IO, - FormatStyle::EscapedNewlineAlignmentStyle &Value) { - IO.enumCase(Value, "DontAlign", FormatStyle::ENAS_DontAlign); - IO.enumCase(Value, "Left", FormatStyle::ENAS_Left); - IO.enumCase(Value, "Right", FormatStyle::ENAS_Right); - - // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::ENAS_Left); - IO.enumCase(Value, "false", FormatStyle::ENAS_Right); - } -}; - -template <> struct ScalarEnumerationTraits<FormatStyle::PointerAlignmentStyle> { - static void enumeration(IO &IO, FormatStyle::PointerAlignmentStyle &Value) { - IO.enumCase(Value, "Middle", FormatStyle::PAS_Middle); - IO.enumCase(Value, "Left", FormatStyle::PAS_Left); - IO.enumCase(Value, "Right", FormatStyle::PAS_Right); - - // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::PAS_Left); - IO.enumCase(Value, "false", FormatStyle::PAS_Right); - } -}; - -template <> -struct ScalarEnumerationTraits<FormatStyle::SpaceBeforeParensOptions> { - static void enumeration(IO &IO, - FormatStyle::SpaceBeforeParensOptions &Value) { - IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); - IO.enumCase(Value, "ControlStatements", - FormatStyle::SBPO_ControlStatements); - IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); - - // For backward compatibility. - IO.enumCase(Value, "false", FormatStyle::SBPO_Never); - IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); - } -}; - -template <> struct MappingTraits<FormatStyle> { - static void mapping(IO &IO, FormatStyle &Style) { - // When reading, read the language first, we need it for getPredefinedStyle. - IO.mapOptional("Language", Style.Language); - - if (IO.outputting()) { - StringRef StylesArray[] = {"LLVM", "Google", "Chromium", - "Mozilla", "WebKit", "GNU"}; - ArrayRef<StringRef> Styles(StylesArray); - for (size_t i = 0, e = Styles.size(); i < e; ++i) { - StringRef StyleName(Styles[i]); - FormatStyle PredefinedStyle; - if (getPredefinedStyle(StyleName, Style.Language, &PredefinedStyle) && - Style == PredefinedStyle) { - IO.mapOptional("# BasedOnStyle", StyleName); - break; - } - } - } else { - StringRef BasedOnStyle; - IO.mapOptional("BasedOnStyle", BasedOnStyle); - if (!BasedOnStyle.empty()) { - FormatStyle::LanguageKind OldLanguage = Style.Language; - FormatStyle::LanguageKind Language = - ((FormatStyle *)IO.getContext())->Language; - if (!getPredefinedStyle(BasedOnStyle, Language, &Style)) { - IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); - return; - } - Style.Language = OldLanguage; - } - } - - // For backward compatibility. - if (!IO.outputting()) { - IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); - IO.mapOptional("DerivePointerBinding", Style.DerivePointerAlignment); - IO.mapOptional("IndentFunctionDeclarationAfterType", - Style.IndentWrappedFunctionNames); - IO.mapOptional("PointerBindsToType", Style.PointerAlignment); - IO.mapOptional("SpaceAfterControlStatementKeyword", - Style.SpaceBeforeParens); - } - - IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); - IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); - IO.mapOptional("AlignConsecutiveAssignments", - Style.AlignConsecutiveAssignments); - IO.mapOptional("AlignConsecutiveDeclarations", - Style.AlignConsecutiveDeclarations); - IO.mapOptional("AlignEscapedNewlines", Style.AlignEscapedNewlines); - IO.mapOptional("AlignOperands", Style.AlignOperands); - IO.mapOptional("AlignTrailingComments", Style.AlignTrailingComments); - IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", - Style.AllowAllParametersOfDeclarationOnNextLine); - IO.mapOptional("AllowShortBlocksOnASingleLine", - Style.AllowShortBlocksOnASingleLine); - IO.mapOptional("AllowShortCaseLabelsOnASingleLine", - Style.AllowShortCaseLabelsOnASingleLine); - IO.mapOptional("AllowShortFunctionsOnASingleLine", - Style.AllowShortFunctionsOnASingleLine); - IO.mapOptional("AllowShortIfStatementsOnASingleLine", - Style.AllowShortIfStatementsOnASingleLine); - IO.mapOptional("AllowShortLoopsOnASingleLine", - Style.AllowShortLoopsOnASingleLine); - IO.mapOptional("AlwaysBreakAfterDefinitionReturnType", - Style.AlwaysBreakAfterDefinitionReturnType); - IO.mapOptional("AlwaysBreakAfterReturnType", - Style.AlwaysBreakAfterReturnType); - // If AlwaysBreakAfterDefinitionReturnType was specified but - // AlwaysBreakAfterReturnType was not, initialize the latter from the - // former for backwards compatibility. - if (Style.AlwaysBreakAfterDefinitionReturnType != FormatStyle::DRTBS_None && - Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_None) { - if (Style.AlwaysBreakAfterDefinitionReturnType == FormatStyle::DRTBS_All) - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; - else if (Style.AlwaysBreakAfterDefinitionReturnType == - FormatStyle::DRTBS_TopLevel) - Style.AlwaysBreakAfterReturnType = - FormatStyle::RTBS_TopLevelDefinitions; - } - - IO.mapOptional("AlwaysBreakBeforeMultilineStrings", - Style.AlwaysBreakBeforeMultilineStrings); - IO.mapOptional("AlwaysBreakTemplateDeclarations", - Style.AlwaysBreakTemplateDeclarations); - IO.mapOptional("BinPackArguments", Style.BinPackArguments); - IO.mapOptional("BinPackParameters", Style.BinPackParameters); - IO.mapOptional("BraceWrapping", Style.BraceWrapping); - IO.mapOptional("BreakBeforeBinaryOperators", - Style.BreakBeforeBinaryOperators); - IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); - - bool BreakBeforeInheritanceComma = false; - IO.mapOptional("BreakBeforeInheritanceComma", - BreakBeforeInheritanceComma); - IO.mapOptional("BreakInheritanceList", - Style.BreakInheritanceList); - // If BreakBeforeInheritanceComma was specified but - // BreakInheritance was not, initialize the latter from the - // former for backwards compatibility. - if (BreakBeforeInheritanceComma && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeColon) - Style.BreakInheritanceList = FormatStyle::BILS_BeforeComma; - - IO.mapOptional("BreakBeforeTernaryOperators", - Style.BreakBeforeTernaryOperators); - - bool BreakConstructorInitializersBeforeComma = false; - IO.mapOptional("BreakConstructorInitializersBeforeComma", - BreakConstructorInitializersBeforeComma); - IO.mapOptional("BreakConstructorInitializers", - Style.BreakConstructorInitializers); - // If BreakConstructorInitializersBeforeComma was specified but - // BreakConstructorInitializers was not, initialize the latter from the - // former for backwards compatibility. - if (BreakConstructorInitializersBeforeComma && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeColon) - Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - - IO.mapOptional("BreakAfterJavaFieldAnnotations", - Style.BreakAfterJavaFieldAnnotations); - IO.mapOptional("BreakStringLiterals", Style.BreakStringLiterals); - IO.mapOptional("ColumnLimit", Style.ColumnLimit); - IO.mapOptional("CommentPragmas", Style.CommentPragmas); - IO.mapOptional("CompactNamespaces", Style.CompactNamespaces); - IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", - Style.ConstructorInitializerAllOnOneLineOrOnePerLine); - IO.mapOptional("ConstructorInitializerIndentWidth", - Style.ConstructorInitializerIndentWidth); - IO.mapOptional("ContinuationIndentWidth", Style.ContinuationIndentWidth); - IO.mapOptional("Cpp11BracedListStyle", Style.Cpp11BracedListStyle); - IO.mapOptional("DerivePointerAlignment", Style.DerivePointerAlignment); - IO.mapOptional("DisableFormat", Style.DisableFormat); - IO.mapOptional("ExperimentalAutoDetectBinPacking", - Style.ExperimentalAutoDetectBinPacking); - IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); - IO.mapOptional("ForEachMacros", Style.ForEachMacros); - IO.mapOptional("IncludeBlocks", Style.IncludeStyle.IncludeBlocks); - IO.mapOptional("IncludeCategories", Style.IncludeStyle.IncludeCategories); - IO.mapOptional("IncludeIsMainRegex", Style.IncludeStyle.IncludeIsMainRegex); - IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); - IO.mapOptional("IndentPPDirectives", Style.IndentPPDirectives); - IO.mapOptional("IndentWidth", Style.IndentWidth); - IO.mapOptional("IndentWrappedFunctionNames", - Style.IndentWrappedFunctionNames); - IO.mapOptional("JavaImportGroups", Style.JavaImportGroups); - IO.mapOptional("JavaScriptQuotes", Style.JavaScriptQuotes); - IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports); - IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", - Style.KeepEmptyLinesAtTheStartOfBlocks); - IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); - IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); - IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); - IO.mapOptional("NamespaceIndentation", Style.NamespaceIndentation); - IO.mapOptional("ObjCBinPackProtocolList", Style.ObjCBinPackProtocolList); - IO.mapOptional("ObjCBlockIndentWidth", Style.ObjCBlockIndentWidth); - IO.mapOptional("ObjCSpaceAfterProperty", Style.ObjCSpaceAfterProperty); - IO.mapOptional("ObjCSpaceBeforeProtocolList", - Style.ObjCSpaceBeforeProtocolList); - IO.mapOptional("PenaltyBreakAssignment", Style.PenaltyBreakAssignment); - IO.mapOptional("PenaltyBreakBeforeFirstCallParameter", - Style.PenaltyBreakBeforeFirstCallParameter); - IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); - IO.mapOptional("PenaltyBreakFirstLessLess", - Style.PenaltyBreakFirstLessLess); - IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); - IO.mapOptional("PenaltyBreakTemplateDeclaration", - Style.PenaltyBreakTemplateDeclaration); - IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); - IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", - Style.PenaltyReturnTypeOnItsOwnLine); - IO.mapOptional("PointerAlignment", Style.PointerAlignment); - IO.mapOptional("RawStringFormats", Style.RawStringFormats); - IO.mapOptional("ReflowComments", Style.ReflowComments); - IO.mapOptional("SortIncludes", Style.SortIncludes); - IO.mapOptional("SortUsingDeclarations", Style.SortUsingDeclarations); - IO.mapOptional("SpaceAfterCStyleCast", Style.SpaceAfterCStyleCast); - IO.mapOptional("SpaceAfterTemplateKeyword", - Style.SpaceAfterTemplateKeyword); - IO.mapOptional("SpaceBeforeAssignmentOperators", - Style.SpaceBeforeAssignmentOperators); - IO.mapOptional("SpaceBeforeCpp11BracedList", - Style.SpaceBeforeCpp11BracedList); - IO.mapOptional("SpaceBeforeCtorInitializerColon", - Style.SpaceBeforeCtorInitializerColon); - IO.mapOptional("SpaceBeforeInheritanceColon", - Style.SpaceBeforeInheritanceColon); - IO.mapOptional("SpaceBeforeParens", Style.SpaceBeforeParens); - IO.mapOptional("SpaceBeforeRangeBasedForLoopColon", - Style.SpaceBeforeRangeBasedForLoopColon); - IO.mapOptional("SpaceInEmptyParentheses", Style.SpaceInEmptyParentheses); - IO.mapOptional("SpacesBeforeTrailingComments", - Style.SpacesBeforeTrailingComments); - IO.mapOptional("SpacesInAngles", Style.SpacesInAngles); - IO.mapOptional("SpacesInContainerLiterals", - Style.SpacesInContainerLiterals); - IO.mapOptional("SpacesInCStyleCastParentheses", - Style.SpacesInCStyleCastParentheses); - IO.mapOptional("SpacesInParentheses", Style.SpacesInParentheses); - IO.mapOptional("SpacesInSquareBrackets", Style.SpacesInSquareBrackets); - IO.mapOptional("Standard", Style.Standard); - IO.mapOptional("StatementMacros", Style.StatementMacros); - IO.mapOptional("TabWidth", Style.TabWidth); - IO.mapOptional("UseTab", Style.UseTab); - } -}; - -template <> struct MappingTraits<FormatStyle::BraceWrappingFlags> { - static void mapping(IO &IO, FormatStyle::BraceWrappingFlags &Wrapping) { - IO.mapOptional("AfterClass", Wrapping.AfterClass); - IO.mapOptional("AfterControlStatement", Wrapping.AfterControlStatement); - IO.mapOptional("AfterEnum", Wrapping.AfterEnum); - IO.mapOptional("AfterFunction", Wrapping.AfterFunction); - IO.mapOptional("AfterNamespace", Wrapping.AfterNamespace); - IO.mapOptional("AfterObjCDeclaration", Wrapping.AfterObjCDeclaration); - IO.mapOptional("AfterStruct", Wrapping.AfterStruct); - IO.mapOptional("AfterUnion", Wrapping.AfterUnion); - IO.mapOptional("AfterExternBlock", Wrapping.AfterExternBlock); - IO.mapOptional("BeforeCatch", Wrapping.BeforeCatch); - IO.mapOptional("BeforeElse", Wrapping.BeforeElse); - IO.mapOptional("IndentBraces", Wrapping.IndentBraces); - IO.mapOptional("SplitEmptyFunction", Wrapping.SplitEmptyFunction); - IO.mapOptional("SplitEmptyRecord", Wrapping.SplitEmptyRecord); - IO.mapOptional("SplitEmptyNamespace", Wrapping.SplitEmptyNamespace); - } -}; - -template <> struct MappingTraits<FormatStyle::RawStringFormat> { - static void mapping(IO &IO, FormatStyle::RawStringFormat &Format) { - IO.mapOptional("Language", Format.Language); - IO.mapOptional("Delimiters", Format.Delimiters); - IO.mapOptional("EnclosingFunctions", Format.EnclosingFunctions); - IO.mapOptional("CanonicalDelimiter", Format.CanonicalDelimiter); - IO.mapOptional("BasedOnStyle", Format.BasedOnStyle); - } -}; - -// Allows to read vector<FormatStyle> while keeping default values. -// IO.getContext() should contain a pointer to the FormatStyle structure, that -// will be used to get default values for missing keys. -// If the first element has no Language specified, it will be treated as the -// default one for the following elements. -template <> struct DocumentListTraits<std::vector<FormatStyle>> { - static size_t size(IO &IO, std::vector<FormatStyle> &Seq) { - return Seq.size(); - } - static FormatStyle &element(IO &IO, std::vector<FormatStyle> &Seq, - size_t Index) { - if (Index >= Seq.size()) { - assert(Index == Seq.size()); - FormatStyle Template; - if (!Seq.empty() && Seq[0].Language == FormatStyle::LK_None) { - Template = Seq[0]; - } else { - Template = *((const FormatStyle *)IO.getContext()); - Template.Language = FormatStyle::LK_None; - } - Seq.resize(Index + 1, Template); - } - return Seq[Index]; - } -}; -} // namespace yaml -} // namespace llvm - -namespace clang { -namespace format { - -const std::error_category &getParseCategory() { - static const ParseErrorCategory C{}; - return C; -} -std::error_code make_error_code(ParseError e) { - return std::error_code(static_cast<int>(e), getParseCategory()); -} - -inline llvm::Error make_string_error(const llvm::Twine &Message) { - return llvm::make_error<llvm::StringError>(Message, - llvm::inconvertibleErrorCode()); -} - -const char *ParseErrorCategory::name() const noexcept { - return "clang-format.parse_error"; -} - -std::string ParseErrorCategory::message(int EV) const { - switch (static_cast<ParseError>(EV)) { - case ParseError::Success: - return "Success"; - case ParseError::Error: - return "Invalid argument"; - case ParseError::Unsuitable: - return "Unsuitable"; - } - llvm_unreachable("unexpected parse error"); -} - -static FormatStyle expandPresets(const FormatStyle &Style) { - if (Style.BreakBeforeBraces == FormatStyle::BS_Custom) - return Style; - FormatStyle Expanded = Style; - Expanded.BraceWrapping = {false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, true}; - switch (Style.BreakBeforeBraces) { - case FormatStyle::BS_Linux: - Expanded.BraceWrapping.AfterClass = true; - Expanded.BraceWrapping.AfterFunction = true; - Expanded.BraceWrapping.AfterNamespace = true; - break; - case FormatStyle::BS_Mozilla: - Expanded.BraceWrapping.AfterClass = true; - Expanded.BraceWrapping.AfterEnum = true; - Expanded.BraceWrapping.AfterFunction = true; - Expanded.BraceWrapping.AfterStruct = true; - Expanded.BraceWrapping.AfterUnion = true; - Expanded.BraceWrapping.AfterExternBlock = true; - Expanded.BraceWrapping.SplitEmptyFunction = true; - Expanded.BraceWrapping.SplitEmptyRecord = false; - break; - case FormatStyle::BS_Stroustrup: - Expanded.BraceWrapping.AfterFunction = true; - Expanded.BraceWrapping.BeforeCatch = true; - Expanded.BraceWrapping.BeforeElse = true; - break; - case FormatStyle::BS_Allman: - Expanded.BraceWrapping.AfterClass = true; - Expanded.BraceWrapping.AfterControlStatement = true; - Expanded.BraceWrapping.AfterEnum = true; - Expanded.BraceWrapping.AfterFunction = true; - Expanded.BraceWrapping.AfterNamespace = true; - Expanded.BraceWrapping.AfterObjCDeclaration = true; - Expanded.BraceWrapping.AfterStruct = true; - Expanded.BraceWrapping.AfterExternBlock = true; - Expanded.BraceWrapping.BeforeCatch = true; - Expanded.BraceWrapping.BeforeElse = true; - break; - case FormatStyle::BS_GNU: - Expanded.BraceWrapping = {true, true, true, true, true, true, true, true, - true, true, true, true, true, true, true}; - break; - case FormatStyle::BS_WebKit: - Expanded.BraceWrapping.AfterFunction = true; - break; - default: - break; - } - return Expanded; -} - -FormatStyle getLLVMStyle() { - FormatStyle LLVMStyle; - LLVMStyle.Language = FormatStyle::LK_Cpp; - LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignEscapedNewlines = FormatStyle::ENAS_Right; - LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; - LLVMStyle.AlignOperands = true; - LLVMStyle.AlignTrailingComments = true; - LLVMStyle.AlignConsecutiveAssignments = false; - LLVMStyle.AlignConsecutiveDeclarations = false; - LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; - LLVMStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_All; - LLVMStyle.AllowShortBlocksOnASingleLine = false; - LLVMStyle.AllowShortCaseLabelsOnASingleLine = false; - LLVMStyle.AllowShortIfStatementsOnASingleLine = false; - LLVMStyle.AllowShortLoopsOnASingleLine = false; - LLVMStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_None; - LLVMStyle.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_None; - LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; - LLVMStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_MultiLine; - LLVMStyle.BinPackArguments = true; - LLVMStyle.BinPackParameters = true; - LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; - LLVMStyle.BreakBeforeTernaryOperators = true; - LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; - LLVMStyle.BraceWrapping = {false, false, false, false, false, - false, false, false, false, false, - false, false, true, true, true}; - LLVMStyle.BreakAfterJavaFieldAnnotations = false; - LLVMStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeColon; - LLVMStyle.BreakInheritanceList = FormatStyle::BILS_BeforeColon; - LLVMStyle.BreakStringLiterals = true; - LLVMStyle.ColumnLimit = 80; - LLVMStyle.CommentPragmas = "^ IWYU pragma:"; - LLVMStyle.CompactNamespaces = false; - LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; - LLVMStyle.ConstructorInitializerIndentWidth = 4; - LLVMStyle.ContinuationIndentWidth = 4; - LLVMStyle.Cpp11BracedListStyle = true; - LLVMStyle.DerivePointerAlignment = false; - LLVMStyle.ExperimentalAutoDetectBinPacking = false; - LLVMStyle.FixNamespaceComments = true; - LLVMStyle.ForEachMacros.push_back("foreach"); - LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); - LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); - LLVMStyle.IncludeStyle.IncludeCategories = { - {"^\"(llvm|llvm-c|clang|clang-c)/", 2}, - {"^(<|\"(gtest|gmock|isl|json)/)", 3}, - {".*", 1}}; - LLVMStyle.IncludeStyle.IncludeIsMainRegex = "(Test)?$"; - LLVMStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve; - LLVMStyle.IndentCaseLabels = false; - LLVMStyle.IndentPPDirectives = FormatStyle::PPDIS_None; - LLVMStyle.IndentWrappedFunctionNames = false; - LLVMStyle.IndentWidth = 2; - LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; - LLVMStyle.JavaScriptWrapImports = true; - LLVMStyle.TabWidth = 8; - LLVMStyle.MaxEmptyLinesToKeep = 1; - LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; - LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; - LLVMStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Auto; - LLVMStyle.ObjCBlockIndentWidth = 2; - LLVMStyle.ObjCSpaceAfterProperty = false; - LLVMStyle.ObjCSpaceBeforeProtocolList = true; - LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; - LLVMStyle.SpacesBeforeTrailingComments = 1; - LLVMStyle.Standard = FormatStyle::LS_Cpp11; - LLVMStyle.UseTab = FormatStyle::UT_Never; - LLVMStyle.ReflowComments = true; - LLVMStyle.SpacesInParentheses = false; - LLVMStyle.SpacesInSquareBrackets = false; - LLVMStyle.SpaceInEmptyParentheses = false; - LLVMStyle.SpacesInContainerLiterals = true; - LLVMStyle.SpacesInCStyleCastParentheses = false; - LLVMStyle.SpaceAfterCStyleCast = false; - LLVMStyle.SpaceAfterTemplateKeyword = true; - LLVMStyle.SpaceBeforeCtorInitializerColon = true; - LLVMStyle.SpaceBeforeInheritanceColon = true; - LLVMStyle.SpaceBeforeParens = FormatStyle::SBPO_ControlStatements; - LLVMStyle.SpaceBeforeRangeBasedForLoopColon = true; - LLVMStyle.SpaceBeforeAssignmentOperators = true; - LLVMStyle.SpaceBeforeCpp11BracedList = false; - LLVMStyle.SpacesInAngles = false; - - LLVMStyle.PenaltyBreakAssignment = prec::Assignment; - LLVMStyle.PenaltyBreakComment = 300; - LLVMStyle.PenaltyBreakFirstLessLess = 120; - LLVMStyle.PenaltyBreakString = 1000; - LLVMStyle.PenaltyExcessCharacter = 1000000; - LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; - LLVMStyle.PenaltyBreakBeforeFirstCallParameter = 19; - LLVMStyle.PenaltyBreakTemplateDeclaration = prec::Relational; - - LLVMStyle.DisableFormat = false; - LLVMStyle.SortIncludes = true; - LLVMStyle.SortUsingDeclarations = true; - LLVMStyle.StatementMacros.push_back("Q_UNUSED"); - LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION"); - - return LLVMStyle; -} - -FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { - if (Language == FormatStyle::LK_TextProto) { - FormatStyle GoogleStyle = getGoogleStyle(FormatStyle::LK_Proto); - GoogleStyle.Language = FormatStyle::LK_TextProto; - - return GoogleStyle; - } - - FormatStyle GoogleStyle = getLLVMStyle(); - GoogleStyle.Language = Language; - - GoogleStyle.AccessModifierOffset = -1; - GoogleStyle.AlignEscapedNewlines = FormatStyle::ENAS_Left; - GoogleStyle.AllowShortIfStatementsOnASingleLine = true; - GoogleStyle.AllowShortLoopsOnASingleLine = true; - GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; - GoogleStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_Yes; - GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; - GoogleStyle.DerivePointerAlignment = true; - GoogleStyle.IncludeStyle.IncludeCategories = { - {"^<ext/.*\\.h>", 2}, {"^<.*\\.h>", 1}, {"^<.*", 2}, {".*", 3}}; - GoogleStyle.IncludeStyle.IncludeIsMainRegex = "([-_](test|unittest))?$"; - GoogleStyle.IndentCaseLabels = true; - GoogleStyle.KeepEmptyLinesAtTheStartOfBlocks = false; - GoogleStyle.ObjCBinPackProtocolList = FormatStyle::BPS_Never; - GoogleStyle.ObjCSpaceAfterProperty = false; - GoogleStyle.ObjCSpaceBeforeProtocolList = true; - GoogleStyle.PointerAlignment = FormatStyle::PAS_Left; - GoogleStyle.RawStringFormats = { - { - FormatStyle::LK_Cpp, - /*Delimiters=*/ - { - "cc", - "CC", - "cpp", - "Cpp", - "CPP", - "c++", - "C++", - }, - /*EnclosingFunctionNames=*/ - {}, - /*CanonicalDelimiter=*/"", - /*BasedOnStyle=*/"google", - }, - { - FormatStyle::LK_TextProto, - /*Delimiters=*/ - { - "pb", - "PB", - "proto", - "PROTO", - }, - /*EnclosingFunctionNames=*/ - { - "EqualsProto", - "EquivToProto", - "PARSE_PARTIAL_TEXT_PROTO", - "PARSE_TEST_PROTO", - "PARSE_TEXT_PROTO", - "ParseTextOrDie", - "ParseTextProtoOrDie", - }, - /*CanonicalDelimiter=*/"", - /*BasedOnStyle=*/"google", - }, - }; - GoogleStyle.SpacesBeforeTrailingComments = 2; - GoogleStyle.Standard = FormatStyle::LS_Auto; - - GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; - GoogleStyle.PenaltyBreakBeforeFirstCallParameter = 1; - - if (Language == FormatStyle::LK_Java) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; - GoogleStyle.AlignOperands = false; - GoogleStyle.AlignTrailingComments = false; - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; - GoogleStyle.AllowShortIfStatementsOnASingleLine = false; - GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; - GoogleStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; - GoogleStyle.ColumnLimit = 100; - GoogleStyle.SpaceAfterCStyleCast = true; - GoogleStyle.SpacesBeforeTrailingComments = 1; - } else if (Language == FormatStyle::LK_JavaScript) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; - GoogleStyle.AlignOperands = false; - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; - GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; - GoogleStyle.BreakBeforeTernaryOperators = false; - // taze:, triple slash directives (`/// <...`), @see, which is commonly - // followed by overlong URLs. - GoogleStyle.CommentPragmas = "(taze:|^/[ \t]*<|@see)"; - GoogleStyle.MaxEmptyLinesToKeep = 3; - GoogleStyle.NamespaceIndentation = FormatStyle::NI_All; - GoogleStyle.SpacesInContainerLiterals = false; - GoogleStyle.JavaScriptQuotes = FormatStyle::JSQS_Single; - GoogleStyle.JavaScriptWrapImports = false; - } else if (Language == FormatStyle::LK_Proto) { - GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; - GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; - GoogleStyle.SpacesInContainerLiterals = false; - GoogleStyle.Cpp11BracedListStyle = false; - // This affects protocol buffer options specifications and text protos. - // Text protos are currently mostly formatted inside C++ raw string literals - // and often the current breaking behavior of string literals is not - // beneficial there. Investigate turning this on once proper string reflow - // has been implemented. - GoogleStyle.BreakStringLiterals = false; - } else if (Language == FormatStyle::LK_ObjC) { - GoogleStyle.AlwaysBreakBeforeMultilineStrings = false; - GoogleStyle.ColumnLimit = 100; - } - - return GoogleStyle; -} - -FormatStyle getChromiumStyle(FormatStyle::LanguageKind Language) { - FormatStyle ChromiumStyle = getGoogleStyle(Language); - if (Language == FormatStyle::LK_Java) { - ChromiumStyle.AllowShortIfStatementsOnASingleLine = true; - ChromiumStyle.BreakAfterJavaFieldAnnotations = true; - ChromiumStyle.ContinuationIndentWidth = 8; - ChromiumStyle.IndentWidth = 4; - // See styleguide for import groups: - // https://chromium.googlesource.com/chromium/src/+/master/styleguide/java/java.md#Import-Order - ChromiumStyle.JavaImportGroups = { - "android", - "com", - "dalvik", - "junit", - "org", - "com.google.android.apps.chrome", - "org.chromium", - "java", - "javax", - }; - ChromiumStyle.SortIncludes = true; - } else if (Language == FormatStyle::LK_JavaScript) { - ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; - ChromiumStyle.AllowShortLoopsOnASingleLine = false; - } else { - ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; - ChromiumStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; - ChromiumStyle.AllowShortLoopsOnASingleLine = false; - ChromiumStyle.BinPackParameters = false; - ChromiumStyle.DerivePointerAlignment = false; - if (Language == FormatStyle::LK_ObjC) - ChromiumStyle.ColumnLimit = 80; - } - return ChromiumStyle; -} - -FormatStyle getMozillaStyle() { - FormatStyle MozillaStyle = getLLVMStyle(); - MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; - MozillaStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Inline; - MozillaStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_TopLevel; - MozillaStyle.AlwaysBreakAfterDefinitionReturnType = - FormatStyle::DRTBS_TopLevel; - MozillaStyle.AlwaysBreakTemplateDeclarations = FormatStyle::BTDS_Yes; - MozillaStyle.BinPackParameters = false; - MozillaStyle.BinPackArguments = false; - MozillaStyle.BreakBeforeBraces = FormatStyle::BS_Mozilla; - MozillaStyle.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - MozillaStyle.BreakInheritanceList = FormatStyle::BILS_BeforeComma; - MozillaStyle.ConstructorInitializerIndentWidth = 2; - MozillaStyle.ContinuationIndentWidth = 2; - MozillaStyle.Cpp11BracedListStyle = false; - MozillaStyle.FixNamespaceComments = false; - MozillaStyle.IndentCaseLabels = true; - MozillaStyle.ObjCSpaceAfterProperty = true; - MozillaStyle.ObjCSpaceBeforeProtocolList = false; - MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; - MozillaStyle.PointerAlignment = FormatStyle::PAS_Left; - MozillaStyle.SpaceAfterTemplateKeyword = false; - return MozillaStyle; -} - -FormatStyle getWebKitStyle() { - FormatStyle Style = getLLVMStyle(); - Style.AccessModifierOffset = -4; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; - Style.AlignOperands = false; - Style.AlignTrailingComments = false; - Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; - Style.BreakBeforeBraces = FormatStyle::BS_WebKit; - Style.BreakConstructorInitializers = FormatStyle::BCIS_BeforeComma; - Style.Cpp11BracedListStyle = false; - Style.ColumnLimit = 0; - Style.FixNamespaceComments = false; - Style.IndentWidth = 4; - Style.NamespaceIndentation = FormatStyle::NI_Inner; - Style.ObjCBlockIndentWidth = 4; - Style.ObjCSpaceAfterProperty = true; - Style.PointerAlignment = FormatStyle::PAS_Left; - Style.SpaceBeforeCpp11BracedList = true; - return Style; -} - -FormatStyle getGNUStyle() { - FormatStyle Style = getLLVMStyle(); - Style.AlwaysBreakAfterDefinitionReturnType = FormatStyle::DRTBS_All; - Style.AlwaysBreakAfterReturnType = FormatStyle::RTBS_AllDefinitions; - Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; - Style.BreakBeforeBraces = FormatStyle::BS_GNU; - Style.BreakBeforeTernaryOperators = true; - Style.Cpp11BracedListStyle = false; - Style.ColumnLimit = 79; - Style.FixNamespaceComments = false; - Style.SpaceBeforeParens = FormatStyle::SBPO_Always; - Style.Standard = FormatStyle::LS_Cpp03; - return Style; -} - -FormatStyle getNoStyle() { - FormatStyle NoStyle = getLLVMStyle(); - NoStyle.DisableFormat = true; - NoStyle.SortIncludes = false; - NoStyle.SortUsingDeclarations = false; - return NoStyle; -} - -bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, - FormatStyle *Style) { - if (Name.equals_lower("llvm")) { - *Style = getLLVMStyle(); - } else if (Name.equals_lower("chromium")) { - *Style = getChromiumStyle(Language); - } else if (Name.equals_lower("mozilla")) { - *Style = getMozillaStyle(); - } else if (Name.equals_lower("google")) { - *Style = getGoogleStyle(Language); - } else if (Name.equals_lower("webkit")) { - *Style = getWebKitStyle(); - } else if (Name.equals_lower("gnu")) { - *Style = getGNUStyle(); - } else if (Name.equals_lower("none")) { - *Style = getNoStyle(); - } else { - return false; - } - - Style->Language = Language; - return true; -} - -std::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { - assert(Style); - FormatStyle::LanguageKind Language = Style->Language; - assert(Language != FormatStyle::LK_None); - if (Text.trim().empty()) - return make_error_code(ParseError::Error); - Style->StyleSet.Clear(); - std::vector<FormatStyle> Styles; - llvm::yaml::Input Input(Text); - // DocumentListTraits<vector<FormatStyle>> uses the context to get default - // values for the fields, keys for which are missing from the configuration. - // Mapping also uses the context to get the language to find the correct - // base style. - Input.setContext(Style); - Input >> Styles; - if (Input.error()) - return Input.error(); - - for (unsigned i = 0; i < Styles.size(); ++i) { - // Ensures that only the first configuration can skip the Language option. - if (Styles[i].Language == FormatStyle::LK_None && i != 0) - return make_error_code(ParseError::Error); - // Ensure that each language is configured at most once. - for (unsigned j = 0; j < i; ++j) { - if (Styles[i].Language == Styles[j].Language) { - LLVM_DEBUG(llvm::dbgs() - << "Duplicate languages in the config file on positions " - << j << " and " << i << "\n"); - return make_error_code(ParseError::Error); - } - } - } - // Look for a suitable configuration starting from the end, so we can - // find the configuration for the specific language first, and the default - // configuration (which can only be at slot 0) after it. - FormatStyle::FormatStyleSet StyleSet; - bool LanguageFound = false; - for (int i = Styles.size() - 1; i >= 0; --i) { - if (Styles[i].Language != FormatStyle::LK_None) - StyleSet.Add(Styles[i]); - if (Styles[i].Language == Language) - LanguageFound = true; - } - if (!LanguageFound) { - if (Styles.empty() || Styles[0].Language != FormatStyle::LK_None) - return make_error_code(ParseError::Unsuitable); - FormatStyle DefaultStyle = Styles[0]; - DefaultStyle.Language = Language; - StyleSet.Add(std::move(DefaultStyle)); - } - *Style = *StyleSet.Get(Language); - return make_error_code(ParseError::Success); -} - -std::string configurationAsText(const FormatStyle &Style) { - std::string Text; - llvm::raw_string_ostream Stream(Text); - llvm::yaml::Output Output(Stream); - // We use the same mapping method for input and output, so we need a non-const - // reference here. - FormatStyle NonConstStyle = expandPresets(Style); - Output << NonConstStyle; - return Stream.str(); -} - -llvm::Optional<FormatStyle> -FormatStyle::FormatStyleSet::Get(FormatStyle::LanguageKind Language) const { - if (!Styles) - return None; - auto It = Styles->find(Language); - if (It == Styles->end()) - return None; - FormatStyle Style = It->second; - Style.StyleSet = *this; - return Style; -} - -void FormatStyle::FormatStyleSet::Add(FormatStyle Style) { - assert(Style.Language != LK_None && - "Cannot add a style for LK_None to a StyleSet"); - assert( - !Style.StyleSet.Styles && - "Cannot add a style associated with an existing StyleSet to a StyleSet"); - if (!Styles) - Styles = std::make_shared<MapType>(); - (*Styles)[Style.Language] = std::move(Style); -} - -void FormatStyle::FormatStyleSet::Clear() { - Styles.reset(); -} - -llvm::Optional<FormatStyle> -FormatStyle::GetLanguageStyle(FormatStyle::LanguageKind Language) const { - return StyleSet.Get(Language); -} - -namespace { - -class JavaScriptRequoter : public TokenAnalyzer { -public: - JavaScriptRequoter(const Environment &Env, const FormatStyle &Style) - : TokenAnalyzer(Env, Style) {} - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override { - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - tooling::Replacements Result; - requoteJSStringLiteral(AnnotatedLines, Result); - return {Result, 0}; - } - -private: - // Replaces double/single-quoted string literal as appropriate, re-escaping - // the contents in the process. - void requoteJSStringLiteral(SmallVectorImpl<AnnotatedLine *> &Lines, - tooling::Replacements &Result) { - for (AnnotatedLine *Line : Lines) { - requoteJSStringLiteral(Line->Children, Result); - if (!Line->Affected) - continue; - for (FormatToken *FormatTok = Line->First; FormatTok; - FormatTok = FormatTok->Next) { - StringRef Input = FormatTok->TokenText; - if (FormatTok->Finalized || !FormatTok->isStringLiteral() || - // NB: testing for not starting with a double quote to avoid - // breaking `template strings`. - (Style.JavaScriptQuotes == FormatStyle::JSQS_Single && - !Input.startswith("\"")) || - (Style.JavaScriptQuotes == FormatStyle::JSQS_Double && - !Input.startswith("\'"))) - continue; - - // Change start and end quote. - bool IsSingle = Style.JavaScriptQuotes == FormatStyle::JSQS_Single; - SourceLocation Start = FormatTok->Tok.getLocation(); - auto Replace = [&](SourceLocation Start, unsigned Length, - StringRef ReplacementText) { - auto Err = Result.add(tooling::Replacement( - Env.getSourceManager(), Start, Length, ReplacementText)); - // FIXME: handle error. For now, print error message and skip the - // replacement for release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false); - } - }; - Replace(Start, 1, IsSingle ? "'" : "\""); - Replace(FormatTok->Tok.getEndLoc().getLocWithOffset(-1), 1, - IsSingle ? "'" : "\""); - - // Escape internal quotes. - bool Escaped = false; - for (size_t i = 1; i < Input.size() - 1; i++) { - switch (Input[i]) { - case '\\': - if (!Escaped && i + 1 < Input.size() && - ((IsSingle && Input[i + 1] == '"') || - (!IsSingle && Input[i + 1] == '\''))) { - // Remove this \, it's escaping a " or ' that no longer needs - // escaping - Replace(Start.getLocWithOffset(i), 1, ""); - continue; - } - Escaped = !Escaped; - break; - case '\"': - case '\'': - if (!Escaped && IsSingle == (Input[i] == '\'')) { - // Escape the quote. - Replace(Start.getLocWithOffset(i), 0, "\\"); - } - Escaped = false; - break; - default: - Escaped = false; - break; - } - } - } - } - } -}; - -class Formatter : public TokenAnalyzer { -public: - Formatter(const Environment &Env, const FormatStyle &Style, - FormattingAttemptStatus *Status) - : TokenAnalyzer(Env, Style), Status(Status) {} - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override { - tooling::Replacements Result; - deriveLocalStyle(AnnotatedLines); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - Annotator.calculateFormattingInformation(*AnnotatedLines[i]); - } - Annotator.setCommentLineLevels(AnnotatedLines); - - WhitespaceManager Whitespaces( - Env.getSourceManager(), Style, - inputUsesCRLF(Env.getSourceManager().getBufferData(Env.getFileID()))); - ContinuationIndenter Indenter(Style, Tokens.getKeywords(), - Env.getSourceManager(), Whitespaces, Encoding, - BinPackInconclusiveFunctions); - unsigned Penalty = - UnwrappedLineFormatter(&Indenter, &Whitespaces, Style, - Tokens.getKeywords(), Env.getSourceManager(), - Status) - .format(AnnotatedLines, /*DryRun=*/false, - /*AdditionalIndent=*/0, - /*FixBadIndentation=*/false, - /*FirstStartColumn=*/Env.getFirstStartColumn(), - /*NextStartColumn=*/Env.getNextStartColumn(), - /*LastStartColumn=*/Env.getLastStartColumn()); - for (const auto &R : Whitespaces.generateReplacements()) - if (Result.add(R)) - return std::make_pair(Result, 0); - return std::make_pair(Result, Penalty); - } - -private: - static bool inputUsesCRLF(StringRef Text) { - return Text.count('\r') * 2 > Text.count('\n'); - } - - bool - hasCpp03IncompatibleFormat(const SmallVectorImpl<AnnotatedLine *> &Lines) { - for (const AnnotatedLine *Line : Lines) { - if (hasCpp03IncompatibleFormat(Line->Children)) - return true; - for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) { - if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) { - if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener)) - return true; - if (Tok->is(TT_TemplateCloser) && - Tok->Previous->is(TT_TemplateCloser)) - return true; - } - } - } - return false; - } - - int countVariableAlignments(const SmallVectorImpl<AnnotatedLine *> &Lines) { - int AlignmentDiff = 0; - for (const AnnotatedLine *Line : Lines) { - AlignmentDiff += countVariableAlignments(Line->Children); - for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) { - if (!Tok->is(TT_PointerOrReference)) - continue; - bool SpaceBefore = - Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); - bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() != - Tok->Next->WhitespaceRange.getEnd(); - if (SpaceBefore && !SpaceAfter) - ++AlignmentDiff; - if (!SpaceBefore && SpaceAfter) - --AlignmentDiff; - } - } - return AlignmentDiff; - } - - void - deriveLocalStyle(const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - bool HasBinPackedFunction = false; - bool HasOnePerLineFunction = false; - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - if (!AnnotatedLines[i]->First->Next) - continue; - FormatToken *Tok = AnnotatedLines[i]->First->Next; - while (Tok->Next) { - if (Tok->PackingKind == PPK_BinPacked) - HasBinPackedFunction = true; - if (Tok->PackingKind == PPK_OnePerLine) - HasOnePerLineFunction = true; - - Tok = Tok->Next; - } - } - if (Style.DerivePointerAlignment) - Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 - ? FormatStyle::PAS_Left - : FormatStyle::PAS_Right; - if (Style.Standard == FormatStyle::LS_Auto) - Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) - ? FormatStyle::LS_Cpp11 - : FormatStyle::LS_Cpp03; - BinPackInconclusiveFunctions = - HasBinPackedFunction || !HasOnePerLineFunction; - } - - bool BinPackInconclusiveFunctions; - FormattingAttemptStatus *Status; -}; - -// This class clean up the erroneous/redundant code around the given ranges in -// file. -class Cleaner : public TokenAnalyzer { -public: - Cleaner(const Environment &Env, const FormatStyle &Style) - : TokenAnalyzer(Env, Style), - DeletedTokens(FormatTokenLess(Env.getSourceManager())) {} - - // FIXME: eliminate unused parameters. - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override { - // FIXME: in the current implementation the granularity of affected range - // is an annotated line. However, this is not sufficient. Furthermore, - // redundant code introduced by replacements does not necessarily - // intercept with ranges of replacements that result in the redundancy. - // To determine if some redundant code is actually introduced by - // replacements(e.g. deletions), we need to come up with a more - // sophisticated way of computing affected ranges. - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - - checkEmptyNamespace(AnnotatedLines); - - for (auto &Line : AnnotatedLines) { - if (Line->Affected) { - cleanupRight(Line->First, tok::comma, tok::comma); - cleanupRight(Line->First, TT_CtorInitializerColon, tok::comma); - cleanupRight(Line->First, tok::l_paren, tok::comma); - cleanupLeft(Line->First, tok::comma, tok::r_paren); - cleanupLeft(Line->First, TT_CtorInitializerComma, tok::l_brace); - cleanupLeft(Line->First, TT_CtorInitializerColon, tok::l_brace); - cleanupLeft(Line->First, TT_CtorInitializerColon, tok::equal); - } - } - - return {generateFixes(), 0}; - } - -private: - bool containsOnlyComments(const AnnotatedLine &Line) { - for (FormatToken *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) { - if (Tok->isNot(tok::comment)) - return false; - } - return true; - } - - // Iterate through all lines and remove any empty (nested) namespaces. - void checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - std::set<unsigned> DeletedLines; - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - auto &Line = *AnnotatedLines[i]; - if (Line.startsWithNamespace()) { - checkEmptyNamespace(AnnotatedLines, i, i, DeletedLines); - } - } - - for (auto Line : DeletedLines) { - FormatToken *Tok = AnnotatedLines[Line]->First; - while (Tok) { - deleteToken(Tok); - Tok = Tok->Next; - } - } - } - - // The function checks if the namespace, which starts from \p CurrentLine, and - // its nested namespaces are empty and delete them if they are empty. It also - // sets \p NewLine to the last line checked. - // Returns true if the current namespace is empty. - bool checkEmptyNamespace(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - unsigned CurrentLine, unsigned &NewLine, - std::set<unsigned> &DeletedLines) { - unsigned InitLine = CurrentLine, End = AnnotatedLines.size(); - if (Style.BraceWrapping.AfterNamespace) { - // If the left brace is in a new line, we should consume it first so that - // it does not make the namespace non-empty. - // FIXME: error handling if there is no left brace. - if (!AnnotatedLines[++CurrentLine]->startsWith(tok::l_brace)) { - NewLine = CurrentLine; - return false; - } - } else if (!AnnotatedLines[CurrentLine]->endsWith(tok::l_brace)) { - return false; - } - while (++CurrentLine < End) { - if (AnnotatedLines[CurrentLine]->startsWith(tok::r_brace)) - break; - - if (AnnotatedLines[CurrentLine]->startsWithNamespace()) { - if (!checkEmptyNamespace(AnnotatedLines, CurrentLine, NewLine, - DeletedLines)) - return false; - CurrentLine = NewLine; - continue; - } - - if (containsOnlyComments(*AnnotatedLines[CurrentLine])) - continue; - - // If there is anything other than comments or nested namespaces in the - // current namespace, the namespace cannot be empty. - NewLine = CurrentLine; - return false; - } - - NewLine = CurrentLine; - if (CurrentLine >= End) - return false; - - // Check if the empty namespace is actually affected by changed ranges. - if (!AffectedRangeMgr.affectsCharSourceRange(CharSourceRange::getCharRange( - AnnotatedLines[InitLine]->First->Tok.getLocation(), - AnnotatedLines[CurrentLine]->Last->Tok.getEndLoc()))) - return false; - - for (unsigned i = InitLine; i <= CurrentLine; ++i) { - DeletedLines.insert(i); - } - - return true; - } - - // Checks pairs {start, start->next},..., {end->previous, end} and deletes one - // of the token in the pair if the left token has \p LK token kind and the - // right token has \p RK token kind. If \p DeleteLeft is true, the left token - // is deleted on match; otherwise, the right token is deleted. - template <typename LeftKind, typename RightKind> - void cleanupPair(FormatToken *Start, LeftKind LK, RightKind RK, - bool DeleteLeft) { - auto NextNotDeleted = [this](const FormatToken &Tok) -> FormatToken * { - for (auto *Res = Tok.Next; Res; Res = Res->Next) - if (!Res->is(tok::comment) && - DeletedTokens.find(Res) == DeletedTokens.end()) - return Res; - return nullptr; - }; - for (auto *Left = Start; Left;) { - auto *Right = NextNotDeleted(*Left); - if (!Right) - break; - if (Left->is(LK) && Right->is(RK)) { - deleteToken(DeleteLeft ? Left : Right); - for (auto *Tok = Left->Next; Tok && Tok != Right; Tok = Tok->Next) - deleteToken(Tok); - // If the right token is deleted, we should keep the left token - // unchanged and pair it with the new right token. - if (!DeleteLeft) - continue; - } - Left = Right; - } - } - - template <typename LeftKind, typename RightKind> - void cleanupLeft(FormatToken *Start, LeftKind LK, RightKind RK) { - cleanupPair(Start, LK, RK, /*DeleteLeft=*/true); - } - - template <typename LeftKind, typename RightKind> - void cleanupRight(FormatToken *Start, LeftKind LK, RightKind RK) { - cleanupPair(Start, LK, RK, /*DeleteLeft=*/false); - } - - // Delete the given token. - inline void deleteToken(FormatToken *Tok) { - if (Tok) - DeletedTokens.insert(Tok); - } - - tooling::Replacements generateFixes() { - tooling::Replacements Fixes; - std::vector<FormatToken *> Tokens; - std::copy(DeletedTokens.begin(), DeletedTokens.end(), - std::back_inserter(Tokens)); - - // Merge multiple continuous token deletions into one big deletion so that - // the number of replacements can be reduced. This makes computing affected - // ranges more efficient when we run reformat on the changed code. - unsigned Idx = 0; - while (Idx < Tokens.size()) { - unsigned St = Idx, End = Idx; - while ((End + 1) < Tokens.size() && - Tokens[End]->Next == Tokens[End + 1]) { - End++; - } - auto SR = CharSourceRange::getCharRange(Tokens[St]->Tok.getLocation(), - Tokens[End]->Tok.getEndLoc()); - auto Err = - Fixes.add(tooling::Replacement(Env.getSourceManager(), SR, "")); - // FIXME: better error handling. for now just print error message and skip - // for the release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false && "Fixes must not conflict!"); - } - Idx = End + 1; - } - - return Fixes; - } - - // Class for less-than inequality comparason for the set `RedundantTokens`. - // We store tokens in the order they appear in the translation unit so that - // we do not need to sort them in `generateFixes()`. - struct FormatTokenLess { - FormatTokenLess(const SourceManager &SM) : SM(SM) {} - - bool operator()(const FormatToken *LHS, const FormatToken *RHS) const { - return SM.isBeforeInTranslationUnit(LHS->Tok.getLocation(), - RHS->Tok.getLocation()); - } - const SourceManager &SM; - }; - - // Tokens to be deleted. - std::set<FormatToken *, FormatTokenLess> DeletedTokens; -}; - -class ObjCHeaderStyleGuesser : public TokenAnalyzer { -public: - ObjCHeaderStyleGuesser(const Environment &Env, const FormatStyle &Style) - : TokenAnalyzer(Env, Style), IsObjC(false) {} - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override { - assert(Style.Language == FormatStyle::LK_Cpp); - IsObjC = guessIsObjC(Env.getSourceManager(), AnnotatedLines, - Tokens.getKeywords()); - tooling::Replacements Result; - return {Result, 0}; - } - - bool isObjC() { return IsObjC; } - -private: - static bool - guessIsObjC(const SourceManager &SourceManager, - const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - const AdditionalKeywords &Keywords) { - // Keep this array sorted, since we are binary searching over it. - static constexpr llvm::StringLiteral FoundationIdentifiers[] = { - "CGFloat", - "CGPoint", - "CGPointMake", - "CGPointZero", - "CGRect", - "CGRectEdge", - "CGRectInfinite", - "CGRectMake", - "CGRectNull", - "CGRectZero", - "CGSize", - "CGSizeMake", - "CGVector", - "CGVectorMake", - "NSAffineTransform", - "NSArray", - "NSAttributedString", - "NSBlockOperation", - "NSBundle", - "NSCache", - "NSCalendar", - "NSCharacterSet", - "NSCountedSet", - "NSData", - "NSDataDetector", - "NSDecimal", - "NSDecimalNumber", - "NSDictionary", - "NSEdgeInsets", - "NSHashTable", - "NSIndexPath", - "NSIndexSet", - "NSInteger", - "NSInvocationOperation", - "NSLocale", - "NSMapTable", - "NSMutableArray", - "NSMutableAttributedString", - "NSMutableCharacterSet", - "NSMutableData", - "NSMutableDictionary", - "NSMutableIndexSet", - "NSMutableOrderedSet", - "NSMutableSet", - "NSMutableString", - "NSNumber", - "NSNumberFormatter", - "NSObject", - "NSOperation", - "NSOperationQueue", - "NSOperationQueuePriority", - "NSOrderedSet", - "NSPoint", - "NSPointerArray", - "NSQualityOfService", - "NSRange", - "NSRect", - "NSRegularExpression", - "NSSet", - "NSSize", - "NSString", - "NSTimeZone", - "NSUInteger", - "NSURL", - "NSURLComponents", - "NSURLQueryItem", - "NSUUID", - "NSValue", - "UIImage", - "UIView", - }; - - for (auto Line : AnnotatedLines) { - for (const FormatToken *FormatTok = Line->First; FormatTok; - FormatTok = FormatTok->Next) { - if ((FormatTok->Previous && FormatTok->Previous->is(tok::at) && - (FormatTok->Tok.getObjCKeywordID() != tok::objc_not_keyword || - FormatTok->isOneOf(tok::numeric_constant, tok::l_square, - tok::l_brace))) || - (FormatTok->Tok.isAnyIdentifier() && - std::binary_search(std::begin(FoundationIdentifiers), - std::end(FoundationIdentifiers), - FormatTok->TokenText)) || - FormatTok->is(TT_ObjCStringLiteral) || - FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, - TT_ObjCBlockLBrace, TT_ObjCBlockLParen, - TT_ObjCDecl, TT_ObjCForIn, TT_ObjCMethodExpr, - TT_ObjCMethodSpecifier, TT_ObjCProperty)) { - LLVM_DEBUG(llvm::dbgs() - << "Detected ObjC at location " - << FormatTok->Tok.getLocation().printToString( - SourceManager) - << " token: " << FormatTok->TokenText << " token type: " - << getTokenTypeName(FormatTok->Type) << "\n"); - return true; - } - if (guessIsObjC(SourceManager, Line->Children, Keywords)) - return true; - } - } - return false; - } - - bool IsObjC; -}; - -struct IncludeDirective { - StringRef Filename; - StringRef Text; - unsigned Offset; - int Category; -}; - -struct JavaImportDirective { - StringRef Identifier; - StringRef Text; - unsigned Offset; - std::vector<StringRef> AssociatedCommentLines; - bool IsStatic; -}; - -} // end anonymous namespace - -// Determines whether 'Ranges' intersects with ('Start', 'End'). -static bool affectsRange(ArrayRef<tooling::Range> Ranges, unsigned Start, - unsigned End) { - for (auto Range : Ranges) { - if (Range.getOffset() < End && - Range.getOffset() + Range.getLength() > Start) - return true; - } - return false; -} - -// Returns a pair (Index, OffsetToEOL) describing the position of the cursor -// before sorting/deduplicating. Index is the index of the include under the -// cursor in the original set of includes. If this include has duplicates, it is -// the index of the first of the duplicates as the others are going to be -// removed. OffsetToEOL describes the cursor's position relative to the end of -// its current line. -// If `Cursor` is not on any #include, `Index` will be UINT_MAX. -static std::pair<unsigned, unsigned> -FindCursorIndex(const SmallVectorImpl<IncludeDirective> &Includes, - const SmallVectorImpl<unsigned> &Indices, unsigned Cursor) { - unsigned CursorIndex = UINT_MAX; - unsigned OffsetToEOL = 0; - for (int i = 0, e = Includes.size(); i != e; ++i) { - unsigned Start = Includes[Indices[i]].Offset; - unsigned End = Start + Includes[Indices[i]].Text.size(); - if (!(Cursor >= Start && Cursor < End)) - continue; - CursorIndex = Indices[i]; - OffsetToEOL = End - Cursor; - // Put the cursor on the only remaining #include among the duplicate - // #includes. - while (--i >= 0 && Includes[CursorIndex].Text == Includes[Indices[i]].Text) - CursorIndex = i; - break; - } - return std::make_pair(CursorIndex, OffsetToEOL); -} - -// Sorts and deduplicate a block of includes given by 'Includes' alphabetically -// adding the necessary replacement to 'Replaces'. 'Includes' must be in strict -// source order. -// #include directives with the same text will be deduplicated, and only the -// first #include in the duplicate #includes remains. If the `Cursor` is -// provided and put on a deleted #include, it will be moved to the remaining -// #include in the duplicate #includes. -static void sortCppIncludes(const FormatStyle &Style, - const SmallVectorImpl<IncludeDirective> &Includes, - ArrayRef<tooling::Range> Ranges, StringRef FileName, - tooling::Replacements &Replaces, unsigned *Cursor) { - unsigned IncludesBeginOffset = Includes.front().Offset; - unsigned IncludesEndOffset = - Includes.back().Offset + Includes.back().Text.size(); - unsigned IncludesBlockSize = IncludesEndOffset - IncludesBeginOffset; - if (!affectsRange(Ranges, IncludesBeginOffset, IncludesEndOffset)) - return; - SmallVector<unsigned, 16> Indices; - for (unsigned i = 0, e = Includes.size(); i != e; ++i) - Indices.push_back(i); - std::stable_sort( - Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) { - return std::tie(Includes[LHSI].Category, Includes[LHSI].Filename) < - std::tie(Includes[RHSI].Category, Includes[RHSI].Filename); - }); - // The index of the include on which the cursor will be put after - // sorting/deduplicating. - unsigned CursorIndex; - // The offset from cursor to the end of line. - unsigned CursorToEOLOffset; - if (Cursor) - std::tie(CursorIndex, CursorToEOLOffset) = - FindCursorIndex(Includes, Indices, *Cursor); - - // Deduplicate #includes. - Indices.erase(std::unique(Indices.begin(), Indices.end(), - [&](unsigned LHSI, unsigned RHSI) { - return Includes[LHSI].Text == Includes[RHSI].Text; - }), - Indices.end()); - - int CurrentCategory = Includes.front().Category; - - // If the #includes are out of order, we generate a single replacement fixing - // the entire block. Otherwise, no replacement is generated. - if (Indices.size() == Includes.size() && - std::is_sorted(Indices.begin(), Indices.end()) && - Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Preserve) - return; - - std::string result; - for (unsigned Index : Indices) { - if (!result.empty()) { - result += "\n"; - if (Style.IncludeStyle.IncludeBlocks == - tooling::IncludeStyle::IBS_Regroup && - CurrentCategory != Includes[Index].Category) - result += "\n"; - } - result += Includes[Index].Text; - if (Cursor && CursorIndex == Index) - *Cursor = IncludesBeginOffset + result.size() - CursorToEOLOffset; - CurrentCategory = Includes[Index].Category; - } - - auto Err = Replaces.add(tooling::Replacement( - FileName, Includes.front().Offset, IncludesBlockSize, result)); - // FIXME: better error handling. For now, just skip the replacement for the - // release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false); - } -} - -namespace { - -const char CppIncludeRegexPattern[] = - R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))"; - -} // anonymous namespace - -tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, - tooling::Replacements &Replaces, - unsigned *Cursor) { - unsigned Prev = 0; - unsigned SearchFrom = 0; - llvm::Regex IncludeRegex(CppIncludeRegexPattern); - SmallVector<StringRef, 4> Matches; - SmallVector<IncludeDirective, 16> IncludesInBlock; - - // In compiled files, consider the first #include to be the main #include of - // the file if it is not a system #include. This ensures that the header - // doesn't have hidden dependencies - // (http://llvm.org/docs/CodingStandards.html#include-style). - // - // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix - // cases where the first #include is unlikely to be the main header. - tooling::IncludeCategoryManager Categories(Style.IncludeStyle, FileName); - bool FirstIncludeBlock = true; - bool MainIncludeFound = false; - bool FormattingOff = false; - - for (;;) { - auto Pos = Code.find('\n', SearchFrom); - StringRef Line = - Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev); - - StringRef Trimmed = Line.trim(); - if (Trimmed == "// clang-format off") - FormattingOff = true; - else if (Trimmed == "// clang-format on") - FormattingOff = false; - - const bool EmptyLineSkipped = - Trimmed.empty() && - (Style.IncludeStyle.IncludeBlocks == tooling::IncludeStyle::IBS_Merge || - Style.IncludeStyle.IncludeBlocks == - tooling::IncludeStyle::IBS_Regroup); - - if (!FormattingOff && !Line.endswith("\\")) { - if (IncludeRegex.match(Line, &Matches)) { - StringRef IncludeName = Matches[2]; - int Category = Categories.getIncludePriority( - IncludeName, - /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock); - if (Category == 0) - MainIncludeFound = true; - IncludesInBlock.push_back({IncludeName, Line, Prev, Category}); - } else if (!IncludesInBlock.empty() && !EmptyLineSkipped) { - sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, - Cursor); - IncludesInBlock.clear(); - FirstIncludeBlock = false; - } - Prev = Pos + 1; - } - if (Pos == StringRef::npos || Pos + 1 == Code.size()) - break; - SearchFrom = Pos + 1; - } - if (!IncludesInBlock.empty()) - sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces, Cursor); - return Replaces; -} - -// Returns group number to use as a first order sort on imports. Gives UINT_MAX -// if the import does not match any given groups. -static unsigned findJavaImportGroup(const FormatStyle &Style, - StringRef ImportIdentifier) { - unsigned LongestMatchIndex = UINT_MAX; - unsigned LongestMatchLength = 0; - for (unsigned I = 0; I < Style.JavaImportGroups.size(); I++) { - std::string GroupPrefix = Style.JavaImportGroups[I]; - if (ImportIdentifier.startswith(GroupPrefix) && - GroupPrefix.length() > LongestMatchLength) { - LongestMatchIndex = I; - LongestMatchLength = GroupPrefix.length(); - } - } - return LongestMatchIndex; -} - -// Sorts and deduplicates a block of includes given by 'Imports' based on -// JavaImportGroups, then adding the necessary replacement to 'Replaces'. -// Import declarations with the same text will be deduplicated. Between each -// import group, a newline is inserted, and within each import group, a -// lexicographic sort based on ASCII value is performed. -static void sortJavaImports(const FormatStyle &Style, - const SmallVectorImpl<JavaImportDirective> &Imports, - ArrayRef<tooling::Range> Ranges, StringRef FileName, - tooling::Replacements &Replaces) { - unsigned ImportsBeginOffset = Imports.front().Offset; - unsigned ImportsEndOffset = - Imports.back().Offset + Imports.back().Text.size(); - unsigned ImportsBlockSize = ImportsEndOffset - ImportsBeginOffset; - if (!affectsRange(Ranges, ImportsBeginOffset, ImportsEndOffset)) - return; - SmallVector<unsigned, 16> Indices; - SmallVector<unsigned, 16> JavaImportGroups; - for (unsigned i = 0, e = Imports.size(); i != e; ++i) { - Indices.push_back(i); - JavaImportGroups.push_back( - findJavaImportGroup(Style, Imports[i].Identifier)); - } - llvm::sort(Indices.begin(), Indices.end(), [&](unsigned LHSI, unsigned RHSI) { - // Negating IsStatic to push static imports above non-static imports. - return std::make_tuple(!Imports[LHSI].IsStatic, JavaImportGroups[LHSI], - Imports[LHSI].Identifier) < - std::make_tuple(!Imports[RHSI].IsStatic, JavaImportGroups[RHSI], - Imports[RHSI].Identifier); - }); - - // Deduplicate imports. - Indices.erase(std::unique(Indices.begin(), Indices.end(), - [&](unsigned LHSI, unsigned RHSI) { - return Imports[LHSI].Text == Imports[RHSI].Text; - }), - Indices.end()); - - bool CurrentIsStatic = Imports[Indices.front()].IsStatic; - unsigned CurrentImportGroup = JavaImportGroups[Indices.front()]; - - std::string result; - for (unsigned Index : Indices) { - if (!result.empty()) { - result += "\n"; - if (CurrentIsStatic != Imports[Index].IsStatic || - CurrentImportGroup != JavaImportGroups[Index]) - result += "\n"; - } - for (StringRef CommentLine : Imports[Index].AssociatedCommentLines) { - result += CommentLine; - result += "\n"; - } - result += Imports[Index].Text; - CurrentIsStatic = Imports[Index].IsStatic; - CurrentImportGroup = JavaImportGroups[Index]; - } - - auto Err = Replaces.add(tooling::Replacement(FileName, Imports.front().Offset, - ImportsBlockSize, result)); - // FIXME: better error handling. For now, just skip the replacement for the - // release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false); - } -} - -namespace { - -const char JavaImportRegexPattern[] = - "^[\t ]*import[\t ]*(static[\t ]*)?([^\t ]*)[\t ]*;"; - -} // anonymous namespace - -tooling::Replacements sortJavaImports(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, - tooling::Replacements &Replaces) { - unsigned Prev = 0; - unsigned SearchFrom = 0; - llvm::Regex ImportRegex(JavaImportRegexPattern); - SmallVector<StringRef, 4> Matches; - SmallVector<JavaImportDirective, 16> ImportsInBlock; - std::vector<StringRef> AssociatedCommentLines; - - bool FormattingOff = false; - - for (;;) { - auto Pos = Code.find('\n', SearchFrom); - StringRef Line = - Code.substr(Prev, (Pos != StringRef::npos ? Pos : Code.size()) - Prev); - - StringRef Trimmed = Line.trim(); - if (Trimmed == "// clang-format off") - FormattingOff = true; - else if (Trimmed == "// clang-format on") - FormattingOff = false; - - if (ImportRegex.match(Line, &Matches)) { - if (FormattingOff) { - // If at least one import line has formatting turned off, turn off - // formatting entirely. - return Replaces; - } - StringRef Static = Matches[1]; - StringRef Identifier = Matches[2]; - bool IsStatic = false; - if (Static.contains("static")) { - IsStatic = true; - } - ImportsInBlock.push_back({Identifier, Line, Prev, AssociatedCommentLines, IsStatic}); - AssociatedCommentLines.clear(); - } else if (Trimmed.size() > 0 && !ImportsInBlock.empty()) { - // Associating comments within the imports with the nearest import below - AssociatedCommentLines.push_back(Line); - } - Prev = Pos + 1; - if (Pos == StringRef::npos || Pos + 1 == Code.size()) - break; - SearchFrom = Pos + 1; - } - if (!ImportsInBlock.empty()) - sortJavaImports(Style, ImportsInBlock, Ranges, FileName, Replaces); - return Replaces; -} - -bool isMpegTS(StringRef Code) { - // MPEG transport streams use the ".ts" file extension. clang-format should - // not attempt to format those. MPEG TS' frame format starts with 0x47 every - // 189 bytes - detect that and return. - return Code.size() > 188 && Code[0] == 0x47 && Code[188] == 0x47; -} - -bool isLikelyXml(StringRef Code) { return Code.ltrim().startswith("<"); } - -tooling::Replacements sortIncludes(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, unsigned *Cursor) { - tooling::Replacements Replaces; - if (!Style.SortIncludes) - return Replaces; - if (isLikelyXml(Code)) - return Replaces; - if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript && - isMpegTS(Code)) - return Replaces; - if (Style.Language == FormatStyle::LanguageKind::LK_JavaScript) - return sortJavaScriptImports(Style, Code, Ranges, FileName); - if (Style.Language == FormatStyle::LanguageKind::LK_Java) - return sortJavaImports(Style, Code, Ranges, FileName, Replaces); - sortCppIncludes(Style, Code, Ranges, FileName, Replaces, Cursor); - return Replaces; -} - -template <typename T> -static llvm::Expected<tooling::Replacements> -processReplacements(T ProcessFunc, StringRef Code, - const tooling::Replacements &Replaces, - const FormatStyle &Style) { - if (Replaces.empty()) - return tooling::Replacements(); - - auto NewCode = applyAllReplacements(Code, Replaces); - if (!NewCode) - return NewCode.takeError(); - std::vector<tooling::Range> ChangedRanges = Replaces.getAffectedRanges(); - StringRef FileName = Replaces.begin()->getFilePath(); - - tooling::Replacements FormatReplaces = - ProcessFunc(Style, *NewCode, ChangedRanges, FileName); - - return Replaces.merge(FormatReplaces); -} - -llvm::Expected<tooling::Replacements> -formatReplacements(StringRef Code, const tooling::Replacements &Replaces, - const FormatStyle &Style) { - // We need to use lambda function here since there are two versions of - // `sortIncludes`. - auto SortIncludes = [](const FormatStyle &Style, StringRef Code, - std::vector<tooling::Range> Ranges, - StringRef FileName) -> tooling::Replacements { - return sortIncludes(Style, Code, Ranges, FileName); - }; - auto SortedReplaces = - processReplacements(SortIncludes, Code, Replaces, Style); - if (!SortedReplaces) - return SortedReplaces.takeError(); - - // We need to use lambda function here since there are two versions of - // `reformat`. - auto Reformat = [](const FormatStyle &Style, StringRef Code, - std::vector<tooling::Range> Ranges, - StringRef FileName) -> tooling::Replacements { - return reformat(Style, Code, Ranges, FileName); - }; - return processReplacements(Reformat, Code, *SortedReplaces, Style); -} - -namespace { - -inline bool isHeaderInsertion(const tooling::Replacement &Replace) { - return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 && - llvm::Regex(CppIncludeRegexPattern) - .match(Replace.getReplacementText()); -} - -inline bool isHeaderDeletion(const tooling::Replacement &Replace) { - return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1; -} - -// FIXME: insert empty lines between newly created blocks. -tooling::Replacements -fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces, - const FormatStyle &Style) { - if (!Style.isCpp()) - return Replaces; - - tooling::Replacements HeaderInsertions; - std::set<llvm::StringRef> HeadersToDelete; - tooling::Replacements Result; - for (const auto &R : Replaces) { - if (isHeaderInsertion(R)) { - // Replacements from \p Replaces must be conflict-free already, so we can - // simply consume the error. - llvm::consumeError(HeaderInsertions.add(R)); - } else if (isHeaderDeletion(R)) { - HeadersToDelete.insert(R.getReplacementText()); - } else if (R.getOffset() == UINT_MAX) { - llvm::errs() << "Insertions other than header #include insertion are " - "not supported! " - << R.getReplacementText() << "\n"; - } else { - llvm::consumeError(Result.add(R)); - } - } - if (HeaderInsertions.empty() && HeadersToDelete.empty()) - return Replaces; - - - StringRef FileName = Replaces.begin()->getFilePath(); - tooling::HeaderIncludes Includes(FileName, Code, Style.IncludeStyle); - - for (const auto &Header : HeadersToDelete) { - tooling::Replacements Replaces = - Includes.remove(Header.trim("\"<>"), Header.startswith("<")); - for (const auto &R : Replaces) { - auto Err = Result.add(R); - if (Err) { - // Ignore the deletion on conflict. - llvm::errs() << "Failed to add header deletion replacement for " - << Header << ": " << llvm::toString(std::move(Err)) - << "\n"; - } - } - } - - llvm::Regex IncludeRegex = llvm::Regex(CppIncludeRegexPattern); - llvm::SmallVector<StringRef, 4> Matches; - for (const auto &R : HeaderInsertions) { - auto IncludeDirective = R.getReplacementText(); - bool Matched = IncludeRegex.match(IncludeDirective, &Matches); - assert(Matched && "Header insertion replacement must have replacement text " - "'#include ...'"); - (void)Matched; - auto IncludeName = Matches[2]; - auto Replace = - Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<")); - if (Replace) { - auto Err = Result.add(*Replace); - if (Err) { - llvm::consumeError(std::move(Err)); - unsigned NewOffset = Result.getShiftedCodePosition(Replace->getOffset()); - auto Shifted = tooling::Replacement(FileName, NewOffset, 0, - Replace->getReplacementText()); - Result = Result.merge(tooling::Replacements(Shifted)); - } - } - } - return Result; -} - -} // anonymous namespace - -llvm::Expected<tooling::Replacements> -cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, - const FormatStyle &Style) { - // We need to use lambda function here since there are two versions of - // `cleanup`. - auto Cleanup = [](const FormatStyle &Style, StringRef Code, - std::vector<tooling::Range> Ranges, - StringRef FileName) -> tooling::Replacements { - return cleanup(Style, Code, Ranges, FileName); - }; - // Make header insertion replacements insert new headers into correct blocks. - tooling::Replacements NewReplaces = - fixCppIncludeInsertions(Code, Replaces, Style); - return processReplacements(Cleanup, Code, NewReplaces, Style); -} - -namespace internal { -std::pair<tooling::Replacements, unsigned> -reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, - unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, - FormattingAttemptStatus *Status) { - FormatStyle Expanded = expandPresets(Style); - if (Expanded.DisableFormat) - return {tooling::Replacements(), 0}; - if (isLikelyXml(Code)) - return {tooling::Replacements(), 0}; - if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) - return {tooling::Replacements(), 0}; - - typedef std::function<std::pair<tooling::Replacements, unsigned>( - const Environment &)> - AnalyzerPass; - SmallVector<AnalyzerPass, 4> Passes; - - if (Style.Language == FormatStyle::LK_Cpp) { - if (Style.FixNamespaceComments) - Passes.emplace_back([&](const Environment &Env) { - return NamespaceEndCommentsFixer(Env, Expanded).process(); - }); - - if (Style.SortUsingDeclarations) - Passes.emplace_back([&](const Environment &Env) { - return UsingDeclarationsSorter(Env, Expanded).process(); - }); - } - - if (Style.Language == FormatStyle::LK_JavaScript && - Style.JavaScriptQuotes != FormatStyle::JSQS_Leave) - Passes.emplace_back([&](const Environment &Env) { - return JavaScriptRequoter(Env, Expanded).process(); - }); - - Passes.emplace_back([&](const Environment &Env) { - return Formatter(Env, Expanded, Status).process(); - }); - - auto Env = - llvm::make_unique<Environment>(Code, FileName, Ranges, FirstStartColumn, - NextStartColumn, LastStartColumn); - llvm::Optional<std::string> CurrentCode = None; - tooling::Replacements Fixes; - unsigned Penalty = 0; - for (size_t I = 0, E = Passes.size(); I < E; ++I) { - std::pair<tooling::Replacements, unsigned> PassFixes = Passes[I](*Env); - auto NewCode = applyAllReplacements( - CurrentCode ? StringRef(*CurrentCode) : Code, PassFixes.first); - if (NewCode) { - Fixes = Fixes.merge(PassFixes.first); - Penalty += PassFixes.second; - if (I + 1 < E) { - CurrentCode = std::move(*NewCode); - Env = llvm::make_unique<Environment>( - *CurrentCode, FileName, - tooling::calculateRangesAfterReplacements(Fixes, Ranges), - FirstStartColumn, NextStartColumn, LastStartColumn); - } - } - } - - return {Fixes, Penalty}; -} -} // namespace internal - -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, - FormattingAttemptStatus *Status) { - return internal::reformat(Style, Code, Ranges, - /*FirstStartColumn=*/0, - /*NextStartColumn=*/0, - /*LastStartColumn=*/0, FileName, Status) - .first; -} - -tooling::Replacements cleanup(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName) { - // cleanups only apply to C++ (they mostly concern ctor commas etc.) - if (Style.Language != FormatStyle::LK_Cpp) - return tooling::Replacements(); - return Cleaner(Environment(Code, FileName, Ranges), Style).process().first; -} - -tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName, bool *IncompleteFormat) { - FormattingAttemptStatus Status; - auto Result = reformat(Style, Code, Ranges, FileName, &Status); - if (!Status.FormatComplete) - *IncompleteFormat = true; - return Result; -} - -tooling::Replacements fixNamespaceEndComments(const FormatStyle &Style, - StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName) { - return NamespaceEndCommentsFixer(Environment(Code, FileName, Ranges), Style) - .process() - .first; -} - -tooling::Replacements sortUsingDeclarations(const FormatStyle &Style, - StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName) { - return UsingDeclarationsSorter(Environment(Code, FileName, Ranges), Style) - .process() - .first; -} - -LangOptions getFormattingLangOpts(const FormatStyle &Style) { - LangOptions LangOpts; - LangOpts.CPlusPlus = 1; - LangOpts.CPlusPlus11 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.CPlusPlus14 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.CPlusPlus17 = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.CPlusPlus2a = Style.Standard == FormatStyle::LS_Cpp03 ? 0 : 1; - LangOpts.LineComment = 1; - bool AlternativeOperators = Style.isCpp(); - LangOpts.CXXOperatorNames = AlternativeOperators ? 1 : 0; - LangOpts.Bool = 1; - LangOpts.ObjC = 1; - LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. - LangOpts.DeclSpecKeyword = 1; // To get __declspec. - return LangOpts; -} - -const char *StyleOptionHelpDescription = - "Coding style, currently supports:\n" - " LLVM, Google, Chromium, Mozilla, WebKit.\n" - "Use -style=file to load style configuration from\n" - ".clang-format file located in one of the parent\n" - "directories of the source file (or current\n" - "directory for stdin).\n" - "Use -style=\"{key: value, ...}\" to set specific\n" - "parameters, e.g.:\n" - " -style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; - -static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { - if (FileName.endswith(".java")) - return FormatStyle::LK_Java; - if (FileName.endswith_lower(".js") || FileName.endswith_lower(".ts")) - return FormatStyle::LK_JavaScript; // JavaScript or TypeScript. - if (FileName.endswith(".m") || FileName.endswith(".mm")) - return FormatStyle::LK_ObjC; - if (FileName.endswith_lower(".proto") || - FileName.endswith_lower(".protodevel")) - return FormatStyle::LK_Proto; - if (FileName.endswith_lower(".textpb") || - FileName.endswith_lower(".pb.txt") || - FileName.endswith_lower(".textproto") || - FileName.endswith_lower(".asciipb")) - return FormatStyle::LK_TextProto; - if (FileName.endswith_lower(".td")) - return FormatStyle::LK_TableGen; - return FormatStyle::LK_Cpp; -} - -FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { - const auto GuessedLanguage = getLanguageByFileName(FileName); - if (GuessedLanguage == FormatStyle::LK_Cpp) { - auto Extension = llvm::sys::path::extension(FileName); - // If there's no file extension (or it's .h), we need to check the contents - // of the code to see if it contains Objective-C. - if (Extension.empty() || Extension == ".h") { - auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName; - Environment Env(Code, NonEmptyFileName, /*Ranges=*/{}); - ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle()); - Guesser.process(); - if (Guesser.isObjC()) - return FormatStyle::LK_ObjC; - } - } - return GuessedLanguage; -} - -const char *DefaultFormatStyle = "file"; - -const char *DefaultFallbackStyle = "LLVM"; - -llvm::Expected<FormatStyle> getStyle(StringRef StyleName, StringRef FileName, - StringRef FallbackStyleName, - StringRef Code, - llvm::vfs::FileSystem *FS) { - if (!FS) { - FS = llvm::vfs::getRealFileSystem().get(); - } - FormatStyle Style = getLLVMStyle(); - Style.Language = guessLanguage(FileName, Code); - - FormatStyle FallbackStyle = getNoStyle(); - if (!getPredefinedStyle(FallbackStyleName, Style.Language, &FallbackStyle)) - return make_string_error("Invalid fallback style \"" + FallbackStyleName); - - if (StyleName.startswith("{")) { - // Parse YAML/JSON style from the command line. - if (std::error_code ec = parseConfiguration(StyleName, &Style)) - return make_string_error("Error parsing -style: " + ec.message()); - return Style; - } - - if (!StyleName.equals_lower("file")) { - if (!getPredefinedStyle(StyleName, Style.Language, &Style)) - return make_string_error("Invalid value for -style"); - return Style; - } - - // Look for .clang-format/_clang-format file in the file's parent directories. - SmallString<128> UnsuitableConfigFiles; - SmallString<128> Path(FileName); - if (std::error_code EC = FS->makeAbsolute(Path)) - return make_string_error(EC.message()); - - for (StringRef Directory = Path; !Directory.empty(); - Directory = llvm::sys::path::parent_path(Directory)) { - - auto Status = FS->status(Directory); - if (!Status || - Status->getType() != llvm::sys::fs::file_type::directory_file) { - continue; - } - - SmallString<128> ConfigFile(Directory); - - llvm::sys::path::append(ConfigFile, ".clang-format"); - LLVM_DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); - - Status = FS->status(ConfigFile.str()); - bool FoundConfigFile = - Status && (Status->getType() == llvm::sys::fs::file_type::regular_file); - if (!FoundConfigFile) { - // Try _clang-format too, since dotfiles are not commonly used on Windows. - ConfigFile = Directory; - llvm::sys::path::append(ConfigFile, "_clang-format"); - LLVM_DEBUG(llvm::dbgs() << "Trying " << ConfigFile << "...\n"); - Status = FS->status(ConfigFile.str()); - FoundConfigFile = Status && (Status->getType() == - llvm::sys::fs::file_type::regular_file); - } - - if (FoundConfigFile) { - llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text = - FS->getBufferForFile(ConfigFile.str()); - if (std::error_code EC = Text.getError()) - return make_string_error(EC.message()); - if (std::error_code ec = - parseConfiguration(Text.get()->getBuffer(), &Style)) { - if (ec == ParseError::Unsuitable) { - if (!UnsuitableConfigFiles.empty()) - UnsuitableConfigFiles.append(", "); - UnsuitableConfigFiles.append(ConfigFile); - continue; - } - return make_string_error("Error reading " + ConfigFile + ": " + - ec.message()); - } - LLVM_DEBUG(llvm::dbgs() - << "Using configuration file " << ConfigFile << "\n"); - return Style; - } - } - if (!UnsuitableConfigFiles.empty()) - return make_string_error("Configuration file(s) do(es) not support " + - getLanguageName(Style.Language) + ": " + - UnsuitableConfigFiles); - return FallbackStyle; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/FormatInternal.h b/gnu/llvm/tools/clang/lib/Format/FormatInternal.h deleted file mode 100644 index 5c59e7656ee..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/FormatInternal.h +++ /dev/null @@ -1,83 +0,0 @@ -//===--- FormatInternal.h - Format C++ code ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file declares Format APIs to be used internally by the -/// formatting library implementation. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H -#define LLVM_CLANG_LIB_FORMAT_FORMATINTERNAL_H - -#include "BreakableToken.h" -#include "clang/Tooling/Core/Lookup.h" -#include <utility> - -namespace clang { -namespace format { -namespace internal { - -/// Reformats the given \p Ranges in the code fragment \p Code. -/// -/// A fragment of code could conceptually be surrounded by other code that might -/// constrain how that fragment is laid out. -/// For example, consider the fragment of code between 'R"(' and ')"', -/// exclusive, in the following code: -/// -/// void outer(int x) { -/// string inner = R"(name: data -/// ^ FirstStartColumn -/// value: { -/// x: 1 -/// ^ NextStartColumn -/// } -/// )"; -/// ^ LastStartColumn -/// } -/// -/// The outer code can influence the inner fragment as follows: -/// * \p FirstStartColumn specifies the column at which \p Code starts. -/// * \p NextStartColumn specifies the additional indent dictated by the -/// surrounding code. It is applied to the rest of the lines of \p Code. -/// * \p LastStartColumn specifies the column at which the last line of -/// \p Code should end, in case the last line is an empty line. -/// -/// In the case where the last line of the fragment contains content, -/// the fragment ends at the end of that content and \p LastStartColumn is -/// not taken into account, for example in: -/// -/// void block() { -/// string inner = R"(name: value)"; -/// } -/// -/// Each range is extended on either end to its next bigger logic unit, i.e. -/// everything that might influence its formatting or might be influenced by its -/// formatting. -/// -/// Returns a pair P, where: -/// * P.first are the ``Replacements`` necessary to make all \p Ranges comply -/// with \p Style. -/// * P.second is the penalty induced by formatting the fragment \p Code. -/// If the formatting of the fragment doesn't have a notion of penalty, -/// returns 0. -/// -/// If ``Status`` is non-null, its value will be populated with the status of -/// this formatting attempt. See \c FormattingAttemptStatus. -std::pair<tooling::Replacements, unsigned> -reformat(const FormatStyle &Style, StringRef Code, - ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, - unsigned NextStartColumn, unsigned LastStartColumn, StringRef FileName, - FormattingAttemptStatus *Status); - -} // namespace internal -} // namespace format -} // namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/FormatToken.cpp b/gnu/llvm/tools/clang/lib/Format/FormatToken.cpp deleted file mode 100644 index 62b08c576e0..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/FormatToken.cpp +++ /dev/null @@ -1,307 +0,0 @@ -//===--- FormatToken.cpp - Format C++ code --------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements specific functions of \c FormatTokens and their -/// roles. -/// -//===----------------------------------------------------------------------===// - -#include "FormatToken.h" -#include "ContinuationIndenter.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include <climits> - -namespace clang { -namespace format { - -const char *getTokenTypeName(TokenType Type) { - static const char *const TokNames[] = { -#define TYPE(X) #X, - LIST_TOKEN_TYPES -#undef TYPE - nullptr}; - - if (Type < NUM_TOKEN_TYPES) - return TokNames[Type]; - llvm_unreachable("unknown TokenType"); - return nullptr; -} - -// FIXME: This is copy&pasted from Sema. Put it in a common place and remove -// duplication. -bool FormatToken::isSimpleTypeSpecifier() const { - switch (Tok.getKind()) { - case tok::kw_short: - case tok::kw_long: - case tok::kw___int64: - case tok::kw___int128: - case tok::kw_signed: - case tok::kw_unsigned: - case tok::kw_void: - case tok::kw_char: - case tok::kw_int: - case tok::kw_half: - case tok::kw_float: - case tok::kw_double: - case tok::kw__Float16: - case tok::kw___float128: - case tok::kw_wchar_t: - case tok::kw_bool: - case tok::kw___underlying_type: - case tok::annot_typename: - case tok::kw_char8_t: - case tok::kw_char16_t: - case tok::kw_char32_t: - case tok::kw_typeof: - case tok::kw_decltype: - return true; - default: - return false; - } -} - -TokenRole::~TokenRole() {} - -void TokenRole::precomputeFormattingInfos(const FormatToken *Token) {} - -unsigned CommaSeparatedList::formatAfterToken(LineState &State, - ContinuationIndenter *Indenter, - bool DryRun) { - if (State.NextToken == nullptr || !State.NextToken->Previous) - return 0; - - if (Formats.size() == 1) - return 0; // Handled by formatFromToken - - // Ensure that we start on the opening brace. - const FormatToken *LBrace = - State.NextToken->Previous->getPreviousNonComment(); - if (!LBrace || !LBrace->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - LBrace->BlockKind == BK_Block || LBrace->Type == TT_DictLiteral || - LBrace->Next->Type == TT_DesignatedInitializerPeriod) - return 0; - - // Calculate the number of code points we have to format this list. As the - // first token is already placed, we have to subtract it. - unsigned RemainingCodePoints = - Style.ColumnLimit - State.Column + State.NextToken->Previous->ColumnWidth; - - // Find the best ColumnFormat, i.e. the best number of columns to use. - const ColumnFormat *Format = getColumnFormat(RemainingCodePoints); - - // If no ColumnFormat can be used, the braced list would generally be - // bin-packed. Add a severe penalty to this so that column layouts are - // preferred if possible. - if (!Format) - return 10000; - - // Format the entire list. - unsigned Penalty = 0; - unsigned Column = 0; - unsigned Item = 0; - while (State.NextToken != LBrace->MatchingParen) { - bool NewLine = false; - unsigned ExtraSpaces = 0; - - // If the previous token was one of our commas, we are now on the next item. - if (Item < Commas.size() && State.NextToken->Previous == Commas[Item]) { - if (!State.NextToken->isTrailingComment()) { - ExtraSpaces += Format->ColumnSizes[Column] - ItemLengths[Item]; - ++Column; - } - ++Item; - } - - if (Column == Format->Columns || State.NextToken->MustBreakBefore) { - Column = 0; - NewLine = true; - } - - // Place token using the continuation indenter and store the penalty. - Penalty += Indenter->addTokenToState(State, NewLine, DryRun, ExtraSpaces); - } - return Penalty; -} - -unsigned CommaSeparatedList::formatFromToken(LineState &State, - ContinuationIndenter *Indenter, - bool DryRun) { - // Formatting with 1 Column isn't really a column layout, so we don't need the - // special logic here. We can just avoid bin packing any of the parameters. - if (Formats.size() == 1 || HasNestedBracedList) - State.Stack.back().AvoidBinPacking = true; - return 0; -} - -// Returns the lengths in code points between Begin and End (both included), -// assuming that the entire sequence is put on a single line. -static unsigned CodePointsBetween(const FormatToken *Begin, - const FormatToken *End) { - assert(End->TotalLength >= Begin->TotalLength); - return End->TotalLength - Begin->TotalLength + Begin->ColumnWidth; -} - -void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { - // FIXME: At some point we might want to do this for other lists, too. - if (!Token->MatchingParen || - !Token->isOneOf(tok::l_brace, TT_ArrayInitializerLSquare)) - return; - - // In C++11 braced list style, we should not format in columns unless they - // have many items (20 or more) or we allow bin-packing of function call - // arguments. - if (Style.Cpp11BracedListStyle && !Style.BinPackArguments && - Commas.size() < 19) - return; - - // Limit column layout for JavaScript array initializers to 20 or more items - // for now to introduce it carefully. We can become more aggressive if this - // necessary. - if (Token->is(TT_ArrayInitializerLSquare) && Commas.size() < 19) - return; - - // Column format doesn't really make sense if we don't align after brackets. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) - return; - - FormatToken *ItemBegin = Token->Next; - while (ItemBegin->isTrailingComment()) - ItemBegin = ItemBegin->Next; - SmallVector<bool, 8> MustBreakBeforeItem; - - // The lengths of an item if it is put at the end of the line. This includes - // trailing comments which are otherwise ignored for column alignment. - SmallVector<unsigned, 8> EndOfLineItemLength; - - bool HasSeparatingComment = false; - for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) { - // Skip comments on their own line. - while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) { - ItemBegin = ItemBegin->Next; - HasSeparatingComment = i > 0; - } - - MustBreakBeforeItem.push_back(ItemBegin->MustBreakBefore); - if (ItemBegin->is(tok::l_brace)) - HasNestedBracedList = true; - const FormatToken *ItemEnd = nullptr; - if (i == Commas.size()) { - ItemEnd = Token->MatchingParen; - const FormatToken *NonCommentEnd = ItemEnd->getPreviousNonComment(); - ItemLengths.push_back(CodePointsBetween(ItemBegin, NonCommentEnd)); - if (Style.Cpp11BracedListStyle && - !ItemEnd->Previous->isTrailingComment()) { - // In Cpp11 braced list style, the } and possibly other subsequent - // tokens will need to stay on a line with the last element. - while (ItemEnd->Next && !ItemEnd->Next->CanBreakBefore) - ItemEnd = ItemEnd->Next; - } else { - // In other braced lists styles, the "}" can be wrapped to the new line. - ItemEnd = Token->MatchingParen->Previous; - } - } else { - ItemEnd = Commas[i]; - // The comma is counted as part of the item when calculating the length. - ItemLengths.push_back(CodePointsBetween(ItemBegin, ItemEnd)); - - // Consume trailing comments so the are included in EndOfLineItemLength. - if (ItemEnd->Next && !ItemEnd->Next->HasUnescapedNewline && - ItemEnd->Next->isTrailingComment()) - ItemEnd = ItemEnd->Next; - } - EndOfLineItemLength.push_back(CodePointsBetween(ItemBegin, ItemEnd)); - // If there is a trailing comma in the list, the next item will start at the - // closing brace. Don't create an extra item for this. - if (ItemEnd->getNextNonComment() == Token->MatchingParen) - break; - ItemBegin = ItemEnd->Next; - } - - // Don't use column layout for lists with few elements and in presence of - // separating comments. - if (Commas.size() < 5 || HasSeparatingComment) - return; - - if (Token->NestingLevel != 0 && Token->is(tok::l_brace) && Commas.size() < 19) - return; - - // We can never place more than ColumnLimit / 3 items in a row (because of the - // spaces and the comma). - unsigned MaxItems = Style.ColumnLimit / 3; - std::vector<unsigned> MinSizeInColumn; - MinSizeInColumn.reserve(MaxItems); - for (unsigned Columns = 1; Columns <= MaxItems; ++Columns) { - ColumnFormat Format; - Format.Columns = Columns; - Format.ColumnSizes.resize(Columns); - MinSizeInColumn.assign(Columns, UINT_MAX); - Format.LineCount = 1; - bool HasRowWithSufficientColumns = false; - unsigned Column = 0; - for (unsigned i = 0, e = ItemLengths.size(); i != e; ++i) { - assert(i < MustBreakBeforeItem.size()); - if (MustBreakBeforeItem[i] || Column == Columns) { - ++Format.LineCount; - Column = 0; - } - if (Column == Columns - 1) - HasRowWithSufficientColumns = true; - unsigned Length = - (Column == Columns - 1) ? EndOfLineItemLength[i] : ItemLengths[i]; - Format.ColumnSizes[Column] = std::max(Format.ColumnSizes[Column], Length); - MinSizeInColumn[Column] = std::min(MinSizeInColumn[Column], Length); - ++Column; - } - // If all rows are terminated early (e.g. by trailing comments), we don't - // need to look further. - if (!HasRowWithSufficientColumns) - break; - Format.TotalWidth = Columns - 1; // Width of the N-1 spaces. - - for (unsigned i = 0; i < Columns; ++i) - Format.TotalWidth += Format.ColumnSizes[i]; - - // Don't use this Format, if the difference between the longest and shortest - // element in a column exceeds a threshold to avoid excessive spaces. - if ([&] { - for (unsigned i = 0; i < Columns - 1; ++i) - if (Format.ColumnSizes[i] - MinSizeInColumn[i] > 10) - return true; - return false; - }()) - continue; - - // Ignore layouts that are bound to violate the column limit. - if (Format.TotalWidth > Style.ColumnLimit && Columns > 1) - continue; - - Formats.push_back(Format); - } -} - -const CommaSeparatedList::ColumnFormat * -CommaSeparatedList::getColumnFormat(unsigned RemainingCharacters) const { - const ColumnFormat *BestFormat = nullptr; - for (SmallVector<ColumnFormat, 4>::const_reverse_iterator - I = Formats.rbegin(), - E = Formats.rend(); - I != E; ++I) { - if (I->TotalWidth <= RemainingCharacters || I->Columns == 1) { - if (BestFormat && I->LineCount > BestFormat->LineCount) - break; - BestFormat = &*I; - } - } - return BestFormat; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/FormatToken.h b/gnu/llvm/tools/clang/lib/Format/FormatToken.h deleted file mode 100644 index 10390c42911..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/FormatToken.h +++ /dev/null @@ -1,816 +0,0 @@ -//===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the declaration of the FormatToken, a wrapper -/// around Token with additional information related to formatting. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H -#define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H - -#include "clang/Basic/IdentifierTable.h" -#include "clang/Basic/OperatorPrecedence.h" -#include "clang/Format/Format.h" -#include "clang/Lex/Lexer.h" -#include <memory> -#include <unordered_set> - -namespace clang { -namespace format { - -#define LIST_TOKEN_TYPES \ - TYPE(ArrayInitializerLSquare) \ - TYPE(ArraySubscriptLSquare) \ - TYPE(AttributeColon) \ - TYPE(AttributeParen) \ - TYPE(AttributeSquare) \ - TYPE(BinaryOperator) \ - TYPE(BitFieldColon) \ - TYPE(BlockComment) \ - TYPE(CastRParen) \ - TYPE(ConditionalExpr) \ - TYPE(ConflictAlternative) \ - TYPE(ConflictEnd) \ - TYPE(ConflictStart) \ - TYPE(CtorInitializerColon) \ - TYPE(CtorInitializerComma) \ - TYPE(DesignatedInitializerLSquare) \ - TYPE(DesignatedInitializerPeriod) \ - TYPE(DictLiteral) \ - TYPE(ForEachMacro) \ - TYPE(FunctionAnnotationRParen) \ - TYPE(FunctionDeclarationName) \ - TYPE(FunctionLBrace) \ - TYPE(FunctionTypeLParen) \ - TYPE(ImplicitStringLiteral) \ - TYPE(InheritanceColon) \ - TYPE(InheritanceComma) \ - TYPE(InlineASMBrace) \ - TYPE(InlineASMColon) \ - TYPE(JavaAnnotation) \ - TYPE(JsComputedPropertyName) \ - TYPE(JsExponentiation) \ - TYPE(JsExponentiationEqual) \ - TYPE(JsFatArrow) \ - TYPE(JsNonNullAssertion) \ - TYPE(JsTypeColon) \ - TYPE(JsTypeOperator) \ - TYPE(JsTypeOptionalQuestion) \ - TYPE(LambdaArrow) \ - TYPE(LambdaLSquare) \ - TYPE(LeadingJavaAnnotation) \ - TYPE(LineComment) \ - TYPE(MacroBlockBegin) \ - TYPE(MacroBlockEnd) \ - TYPE(ObjCBlockLBrace) \ - TYPE(ObjCBlockLParen) \ - TYPE(ObjCDecl) \ - TYPE(ObjCForIn) \ - TYPE(ObjCMethodExpr) \ - TYPE(ObjCMethodSpecifier) \ - TYPE(ObjCProperty) \ - TYPE(ObjCStringLiteral) \ - TYPE(OverloadedOperator) \ - TYPE(OverloadedOperatorLParen) \ - TYPE(PointerOrReference) \ - TYPE(PureVirtualSpecifier) \ - TYPE(RangeBasedForLoopColon) \ - TYPE(RegexLiteral) \ - TYPE(SelectorName) \ - TYPE(StartOfName) \ - TYPE(StatementMacro) \ - TYPE(StructuredBindingLSquare) \ - TYPE(TemplateCloser) \ - TYPE(TemplateOpener) \ - TYPE(TemplateString) \ - TYPE(ProtoExtensionLSquare) \ - TYPE(TrailingAnnotation) \ - TYPE(TrailingReturnArrow) \ - TYPE(TrailingUnaryOperator) \ - TYPE(UnaryOperator) \ - TYPE(Unknown) - -enum TokenType { -#define TYPE(X) TT_##X, - LIST_TOKEN_TYPES -#undef TYPE - NUM_TOKEN_TYPES -}; - -/// Determines the name of a token type. -const char *getTokenTypeName(TokenType Type); - -// Represents what type of block a set of braces open. -enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; - -// The packing kind of a function's parameters. -enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; - -enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; - -class TokenRole; -class AnnotatedLine; - -/// A wrapper around a \c Token storing information about the -/// whitespace characters preceding it. -struct FormatToken { - FormatToken() {} - - /// The \c Token. - Token Tok; - - /// The number of newlines immediately before the \c Token. - /// - /// This can be used to determine what the user wrote in the original code - /// and thereby e.g. leave an empty line between two function definitions. - unsigned NewlinesBefore = 0; - - /// Whether there is at least one unescaped newline before the \c - /// Token. - bool HasUnescapedNewline = false; - - /// The range of the whitespace immediately preceding the \c Token. - SourceRange WhitespaceRange; - - /// The offset just past the last '\n' in this token's leading - /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. - unsigned LastNewlineOffset = 0; - - /// The width of the non-whitespace parts of the token (or its first - /// line for multi-line tokens) in columns. - /// We need this to correctly measure number of columns a token spans. - unsigned ColumnWidth = 0; - - /// Contains the width in columns of the last line of a multi-line - /// token. - unsigned LastLineColumnWidth = 0; - - /// Whether the token text contains newlines (escaped or not). - bool IsMultiline = false; - - /// Indicates that this is the first token of the file. - bool IsFirst = false; - - /// Whether there must be a line break before this token. - /// - /// This happens for example when a preprocessor directive ended directly - /// before the token. - bool MustBreakBefore = false; - - /// The raw text of the token. - /// - /// Contains the raw token text without leading whitespace and without leading - /// escaped newlines. - StringRef TokenText; - - /// Set to \c true if this token is an unterminated literal. - bool IsUnterminatedLiteral = 0; - - /// Contains the kind of block if this token is a brace. - BraceBlockKind BlockKind = BK_Unknown; - - TokenType Type = TT_Unknown; - - /// The number of spaces that should be inserted before this token. - unsigned SpacesRequiredBefore = 0; - - /// \c true if it is allowed to break before this token. - bool CanBreakBefore = false; - - /// \c true if this is the ">" of "template<..>". - bool ClosesTemplateDeclaration = false; - - /// Number of parameters, if this is "(", "[" or "<". - unsigned ParameterCount = 0; - - /// Number of parameters that are nested blocks, - /// if this is "(", "[" or "<". - unsigned BlockParameterCount = 0; - - /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of - /// the surrounding bracket. - tok::TokenKind ParentBracket = tok::unknown; - - /// A token can have a special role that can carry extra information - /// about the token's formatting. - std::unique_ptr<TokenRole> Role; - - /// If this is an opening parenthesis, how are the parameters packed? - ParameterPackingKind PackingKind = PPK_Inconclusive; - - /// The total length of the unwrapped line up to and including this - /// token. - unsigned TotalLength = 0; - - /// The original 0-based column of this token, including expanded tabs. - /// The configured TabWidth is used as tab width. - unsigned OriginalColumn = 0; - - /// The length of following tokens until the next natural split point, - /// or the next token that can be broken. - unsigned UnbreakableTailLength = 0; - - // FIXME: Come up with a 'cleaner' concept. - /// The binding strength of a token. This is a combined value of - /// operator precedence, parenthesis nesting, etc. - unsigned BindingStrength = 0; - - /// The nesting level of this token, i.e. the number of surrounding (), - /// [], {} or <>. - unsigned NestingLevel = 0; - - /// The indent level of this token. Copied from the surrounding line. - unsigned IndentLevel = 0; - - /// Penalty for inserting a line break before this token. - unsigned SplitPenalty = 0; - - /// If this is the first ObjC selector name in an ObjC method - /// definition or call, this contains the length of the longest name. - /// - /// This being set to 0 means that the selectors should not be colon-aligned, - /// e.g. because several of them are block-type. - unsigned LongestObjCSelectorName = 0; - - /// If this is the first ObjC selector name in an ObjC method - /// definition or call, this contains the number of parts that the whole - /// selector consist of. - unsigned ObjCSelectorNameParts = 0; - - /// The 0-based index of the parameter/argument. For ObjC it is set - /// for the selector name token. - /// For now calculated only for ObjC. - unsigned ParameterIndex = 0; - - /// Stores the number of required fake parentheses and the - /// corresponding operator precedence. - /// - /// If multiple fake parentheses start at a token, this vector stores them in - /// reverse order, i.e. inner fake parenthesis first. - SmallVector<prec::Level, 4> FakeLParens; - /// Insert this many fake ) after this token for correct indentation. - unsigned FakeRParens = 0; - - /// \c true if this token starts a binary expression, i.e. has at least - /// one fake l_paren with a precedence greater than prec::Unknown. - bool StartsBinaryExpression = false; - /// \c true if this token ends a binary expression. - bool EndsBinaryExpression = false; - - /// If this is an operator (or "."/"->") in a sequence of operators - /// with the same precedence, contains the 0-based operator index. - unsigned OperatorIndex = 0; - - /// If this is an operator (or "."/"->") in a sequence of operators - /// with the same precedence, points to the next operator. - FormatToken *NextOperator = nullptr; - - /// Is this token part of a \c DeclStmt defining multiple variables? - /// - /// Only set if \c Type == \c TT_StartOfName. - bool PartOfMultiVariableDeclStmt = false; - - /// Does this line comment continue a line comment section? - /// - /// Only set to true if \c Type == \c TT_LineComment. - bool ContinuesLineCommentSection = false; - - /// If this is a bracket, this points to the matching one. - FormatToken *MatchingParen = nullptr; - - /// The previous token in the unwrapped line. - FormatToken *Previous = nullptr; - - /// The next token in the unwrapped line. - FormatToken *Next = nullptr; - - /// If this token starts a block, this contains all the unwrapped lines - /// in it. - SmallVector<AnnotatedLine *, 1> Children; - - /// Stores the formatting decision for the token once it was made. - FormatDecision Decision = FD_Unformatted; - - /// If \c true, this token has been fully formatted (indented and - /// potentially re-formatted inside), and we do not allow further formatting - /// changes. - bool Finalized = false; - - bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } - bool is(TokenType TT) const { return Type == TT; } - bool is(const IdentifierInfo *II) const { - return II && II == Tok.getIdentifierInfo(); - } - bool is(tok::PPKeywordKind Kind) const { - return Tok.getIdentifierInfo() && - Tok.getIdentifierInfo()->getPPKeywordID() == Kind; - } - template <typename A, typename B> bool isOneOf(A K1, B K2) const { - return is(K1) || is(K2); - } - template <typename A, typename B, typename... Ts> - bool isOneOf(A K1, B K2, Ts... Ks) const { - return is(K1) || isOneOf(K2, Ks...); - } - template <typename T> bool isNot(T Kind) const { return !is(Kind); } - - bool closesScopeAfterBlock() const { - if (BlockKind == BK_Block) - return true; - if (closesScope()) - return Previous->closesScopeAfterBlock(); - return false; - } - - /// \c true if this token starts a sequence with the given tokens in order, - /// following the ``Next`` pointers, ignoring comments. - template <typename A, typename... Ts> - bool startsSequence(A K1, Ts... Tokens) const { - return startsSequenceInternal(K1, Tokens...); - } - - /// \c true if this token ends a sequence with the given tokens in order, - /// following the ``Previous`` pointers, ignoring comments. - template <typename A, typename... Ts> - bool endsSequence(A K1, Ts... Tokens) const { - return endsSequenceInternal(K1, Tokens...); - } - - bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } - - bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { - return Tok.isObjCAtKeyword(Kind); - } - - bool isAccessSpecifier(bool ColonRequired = true) const { - return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && - (!ColonRequired || (Next && Next->is(tok::colon))); - } - - /// Determine whether the token is a simple-type-specifier. - bool isSimpleTypeSpecifier() const; - - bool isObjCAccessSpecifier() const { - return is(tok::at) && Next && - (Next->isObjCAtKeyword(tok::objc_public) || - Next->isObjCAtKeyword(tok::objc_protected) || - Next->isObjCAtKeyword(tok::objc_package) || - Next->isObjCAtKeyword(tok::objc_private)); - } - - /// Returns whether \p Tok is ([{ or an opening < of a template or in - /// protos. - bool opensScope() const { - if (is(TT_TemplateString) && TokenText.endswith("${")) - return true; - if (is(TT_DictLiteral) && is(tok::less)) - return true; - return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, - TT_TemplateOpener); - } - /// Returns whether \p Tok is )]} or a closing > of a template or in - /// protos. - bool closesScope() const { - if (is(TT_TemplateString) && TokenText.startswith("}")) - return true; - if (is(TT_DictLiteral) && is(tok::greater)) - return true; - return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, - TT_TemplateCloser); - } - - /// Returns \c true if this is a "." or "->" accessing a member. - bool isMemberAccess() const { - return isOneOf(tok::arrow, tok::period, tok::arrowstar) && - !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, - TT_LambdaArrow); - } - - bool isUnaryOperator() const { - switch (Tok.getKind()) { - case tok::plus: - case tok::plusplus: - case tok::minus: - case tok::minusminus: - case tok::exclaim: - case tok::tilde: - case tok::kw_sizeof: - case tok::kw_alignof: - return true; - default: - return false; - } - } - - bool isBinaryOperator() const { - // Comma is a binary operator, but does not behave as such wrt. formatting. - return getPrecedence() > prec::Comma; - } - - bool isTrailingComment() const { - return is(tok::comment) && - (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); - } - - /// Returns \c true if this is a keyword that can be used - /// like a function call (e.g. sizeof, typeid, ...). - bool isFunctionLikeKeyword() const { - switch (Tok.getKind()) { - case tok::kw_throw: - case tok::kw_typeid: - case tok::kw_return: - case tok::kw_sizeof: - case tok::kw_alignof: - case tok::kw_alignas: - case tok::kw_decltype: - case tok::kw_noexcept: - case tok::kw_static_assert: - case tok::kw___attribute: - return true; - default: - return false; - } - } - - /// Returns \c true if this is a string literal that's like a label, - /// e.g. ends with "=" or ":". - bool isLabelString() const { - if (!is(tok::string_literal)) - return false; - StringRef Content = TokenText; - if (Content.startswith("\"") || Content.startswith("'")) - Content = Content.drop_front(1); - if (Content.endswith("\"") || Content.endswith("'")) - Content = Content.drop_back(1); - Content = Content.trim(); - return Content.size() > 1 && - (Content.back() == ':' || Content.back() == '='); - } - - /// Returns actual token start location without leading escaped - /// newlines and whitespace. - /// - /// This can be different to Tok.getLocation(), which includes leading escaped - /// newlines. - SourceLocation getStartOfNonWhitespace() const { - return WhitespaceRange.getEnd(); - } - - prec::Level getPrecedence() const { - return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, - /*CPlusPlus11=*/true); - } - - /// Returns the previous token ignoring comments. - FormatToken *getPreviousNonComment() const { - FormatToken *Tok = Previous; - while (Tok && Tok->is(tok::comment)) - Tok = Tok->Previous; - return Tok; - } - - /// Returns the next token ignoring comments. - const FormatToken *getNextNonComment() const { - const FormatToken *Tok = Next; - while (Tok && Tok->is(tok::comment)) - Tok = Tok->Next; - return Tok; - } - - /// Returns \c true if this tokens starts a block-type list, i.e. a - /// list that should be indented with a block indent. - bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { - if (is(TT_TemplateString) && opensScope()) - return true; - return is(TT_ArrayInitializerLSquare) || - is(TT_ProtoExtensionLSquare) || - (is(tok::l_brace) && - (BlockKind == BK_Block || is(TT_DictLiteral) || - (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || - (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto)); - } - - /// Returns whether the token is the left square bracket of a C++ - /// structured binding declaration. - bool isCppStructuredBinding(const FormatStyle &Style) const { - if (!Style.isCpp() || isNot(tok::l_square)) - return false; - const FormatToken *T = this; - do { - T = T->getPreviousNonComment(); - } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, - tok::ampamp)); - return T && T->is(tok::kw_auto); - } - - /// Same as opensBlockOrBlockTypeList, but for the closing token. - bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { - if (is(TT_TemplateString) && closesScope()) - return true; - return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); - } - - /// Return the actual namespace token, if this token starts a namespace - /// block. - const FormatToken *getNamespaceToken() const { - const FormatToken *NamespaceTok = this; - if (is(tok::comment)) - NamespaceTok = NamespaceTok->getNextNonComment(); - // Detect "(inline|export)? namespace" in the beginning of a line. - if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) - NamespaceTok = NamespaceTok->getNextNonComment(); - return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok - : nullptr; - } - -private: - // Disallow copying. - FormatToken(const FormatToken &) = delete; - void operator=(const FormatToken &) = delete; - - template <typename A, typename... Ts> - bool startsSequenceInternal(A K1, Ts... Tokens) const { - if (is(tok::comment) && Next) - return Next->startsSequenceInternal(K1, Tokens...); - return is(K1) && Next && Next->startsSequenceInternal(Tokens...); - } - - template <typename A> bool startsSequenceInternal(A K1) const { - if (is(tok::comment) && Next) - return Next->startsSequenceInternal(K1); - return is(K1); - } - - template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { - if (is(tok::comment) && Previous) - return Previous->endsSequenceInternal(K1); - return is(K1); - } - - template <typename A, typename... Ts> - bool endsSequenceInternal(A K1, Ts... Tokens) const { - if (is(tok::comment) && Previous) - return Previous->endsSequenceInternal(K1, Tokens...); - return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); - } -}; - -class ContinuationIndenter; -struct LineState; - -class TokenRole { -public: - TokenRole(const FormatStyle &Style) : Style(Style) {} - virtual ~TokenRole(); - - /// After the \c TokenAnnotator has finished annotating all the tokens, - /// this function precomputes required information for formatting. - virtual void precomputeFormattingInfos(const FormatToken *Token); - - /// Apply the special formatting that the given role demands. - /// - /// Assumes that the token having this role is already formatted. - /// - /// Continues formatting from \p State leaving indentation to \p Indenter and - /// returns the total penalty that this formatting incurs. - virtual unsigned formatFromToken(LineState &State, - ContinuationIndenter *Indenter, - bool DryRun) { - return 0; - } - - /// Same as \c formatFromToken, but assumes that the first token has - /// already been set thereby deciding on the first line break. - virtual unsigned formatAfterToken(LineState &State, - ContinuationIndenter *Indenter, - bool DryRun) { - return 0; - } - - /// Notifies the \c Role that a comma was found. - virtual void CommaFound(const FormatToken *Token) {} - - virtual const FormatToken *lastComma() { return nullptr; } - -protected: - const FormatStyle &Style; -}; - -class CommaSeparatedList : public TokenRole { -public: - CommaSeparatedList(const FormatStyle &Style) - : TokenRole(Style), HasNestedBracedList(false) {} - - void precomputeFormattingInfos(const FormatToken *Token) override; - - unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, - bool DryRun) override; - - unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, - bool DryRun) override; - - /// Adds \p Token as the next comma to the \c CommaSeparated list. - void CommaFound(const FormatToken *Token) override { - Commas.push_back(Token); - } - - const FormatToken *lastComma() override { - if (Commas.empty()) - return nullptr; - return Commas.back(); - } - -private: - /// A struct that holds information on how to format a given list with - /// a specific number of columns. - struct ColumnFormat { - /// The number of columns to use. - unsigned Columns; - - /// The total width in characters. - unsigned TotalWidth; - - /// The number of lines required for this format. - unsigned LineCount; - - /// The size of each column in characters. - SmallVector<unsigned, 8> ColumnSizes; - }; - - /// Calculate which \c ColumnFormat fits best into - /// \p RemainingCharacters. - const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; - - /// The ordered \c FormatTokens making up the commas of this list. - SmallVector<const FormatToken *, 8> Commas; - - /// The length of each of the list's items in characters including the - /// trailing comma. - SmallVector<unsigned, 8> ItemLengths; - - /// Precomputed formats that can be used for this list. - SmallVector<ColumnFormat, 4> Formats; - - bool HasNestedBracedList; -}; - -/// Encapsulates keywords that are context sensitive or for languages not -/// properly supported by Clang's lexer. -struct AdditionalKeywords { - AdditionalKeywords(IdentifierTable &IdentTable) { - kw_final = &IdentTable.get("final"); - kw_override = &IdentTable.get("override"); - kw_in = &IdentTable.get("in"); - kw_of = &IdentTable.get("of"); - kw_CF_ENUM = &IdentTable.get("CF_ENUM"); - kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); - kw_NS_ENUM = &IdentTable.get("NS_ENUM"); - kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); - - kw_as = &IdentTable.get("as"); - kw_async = &IdentTable.get("async"); - kw_await = &IdentTable.get("await"); - kw_declare = &IdentTable.get("declare"); - kw_finally = &IdentTable.get("finally"); - kw_from = &IdentTable.get("from"); - kw_function = &IdentTable.get("function"); - kw_get = &IdentTable.get("get"); - kw_import = &IdentTable.get("import"); - kw_infer = &IdentTable.get("infer"); - kw_is = &IdentTable.get("is"); - kw_let = &IdentTable.get("let"); - kw_module = &IdentTable.get("module"); - kw_readonly = &IdentTable.get("readonly"); - kw_set = &IdentTable.get("set"); - kw_type = &IdentTable.get("type"); - kw_typeof = &IdentTable.get("typeof"); - kw_var = &IdentTable.get("var"); - kw_yield = &IdentTable.get("yield"); - - kw_abstract = &IdentTable.get("abstract"); - kw_assert = &IdentTable.get("assert"); - kw_extends = &IdentTable.get("extends"); - kw_implements = &IdentTable.get("implements"); - kw_instanceof = &IdentTable.get("instanceof"); - kw_interface = &IdentTable.get("interface"); - kw_native = &IdentTable.get("native"); - kw_package = &IdentTable.get("package"); - kw_synchronized = &IdentTable.get("synchronized"); - kw_throws = &IdentTable.get("throws"); - kw___except = &IdentTable.get("__except"); - kw___has_include = &IdentTable.get("__has_include"); - kw___has_include_next = &IdentTable.get("__has_include_next"); - - kw_mark = &IdentTable.get("mark"); - - kw_extend = &IdentTable.get("extend"); - kw_option = &IdentTable.get("option"); - kw_optional = &IdentTable.get("optional"); - kw_repeated = &IdentTable.get("repeated"); - kw_required = &IdentTable.get("required"); - kw_returns = &IdentTable.get("returns"); - - kw_signals = &IdentTable.get("signals"); - kw_qsignals = &IdentTable.get("Q_SIGNALS"); - kw_slots = &IdentTable.get("slots"); - kw_qslots = &IdentTable.get("Q_SLOTS"); - - // Keep this at the end of the constructor to make sure everything here is - // already initialized. - JsExtraKeywords = std::unordered_set<IdentifierInfo *>( - {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, - kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, - kw_set, kw_type, kw_typeof, kw_var, kw_yield, - // Keywords from the Java section. - kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); - } - - // Context sensitive keywords. - IdentifierInfo *kw_final; - IdentifierInfo *kw_override; - IdentifierInfo *kw_in; - IdentifierInfo *kw_of; - IdentifierInfo *kw_CF_ENUM; - IdentifierInfo *kw_CF_OPTIONS; - IdentifierInfo *kw_NS_ENUM; - IdentifierInfo *kw_NS_OPTIONS; - IdentifierInfo *kw___except; - IdentifierInfo *kw___has_include; - IdentifierInfo *kw___has_include_next; - - // JavaScript keywords. - IdentifierInfo *kw_as; - IdentifierInfo *kw_async; - IdentifierInfo *kw_await; - IdentifierInfo *kw_declare; - IdentifierInfo *kw_finally; - IdentifierInfo *kw_from; - IdentifierInfo *kw_function; - IdentifierInfo *kw_get; - IdentifierInfo *kw_import; - IdentifierInfo *kw_infer; - IdentifierInfo *kw_is; - IdentifierInfo *kw_let; - IdentifierInfo *kw_module; - IdentifierInfo *kw_readonly; - IdentifierInfo *kw_set; - IdentifierInfo *kw_type; - IdentifierInfo *kw_typeof; - IdentifierInfo *kw_var; - IdentifierInfo *kw_yield; - - // Java keywords. - IdentifierInfo *kw_abstract; - IdentifierInfo *kw_assert; - IdentifierInfo *kw_extends; - IdentifierInfo *kw_implements; - IdentifierInfo *kw_instanceof; - IdentifierInfo *kw_interface; - IdentifierInfo *kw_native; - IdentifierInfo *kw_package; - IdentifierInfo *kw_synchronized; - IdentifierInfo *kw_throws; - - // Pragma keywords. - IdentifierInfo *kw_mark; - - // Proto keywords. - IdentifierInfo *kw_extend; - IdentifierInfo *kw_option; - IdentifierInfo *kw_optional; - IdentifierInfo *kw_repeated; - IdentifierInfo *kw_required; - IdentifierInfo *kw_returns; - - // QT keywords. - IdentifierInfo *kw_signals; - IdentifierInfo *kw_qsignals; - IdentifierInfo *kw_slots; - IdentifierInfo *kw_qslots; - - /// Returns \c true if \p Tok is a true JavaScript identifier, returns - /// \c false if it is a keyword or a pseudo keyword. - bool IsJavaScriptIdentifier(const FormatToken &Tok) const { - return Tok.is(tok::identifier) && - JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == - JsExtraKeywords.end(); - } - -private: - /// The JavaScript keywords beyond the C++ keyword set. - std::unordered_set<IdentifierInfo *> JsExtraKeywords; -}; - -} // namespace format -} // namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp b/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp deleted file mode 100644 index 146f5d68b55..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.cpp +++ /dev/null @@ -1,725 +0,0 @@ -//===--- FormatTokenLexer.cpp - Lex FormatTokens -------------*- C++ ----*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements FormatTokenLexer, which tokenizes a source file -/// into a FormatToken stream suitable for ClangFormat. -/// -//===----------------------------------------------------------------------===// - -#include "FormatTokenLexer.h" -#include "FormatToken.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Regex.h" - -namespace clang { -namespace format { - -FormatTokenLexer::FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, - unsigned Column, const FormatStyle &Style, - encoding::Encoding Encoding) - : FormatTok(nullptr), IsFirstToken(true), StateStack({LexerState::NORMAL}), - Column(Column), TrailingWhitespace(0), SourceMgr(SourceMgr), ID(ID), - Style(Style), IdentTable(getFormattingLangOpts(Style)), - Keywords(IdentTable), Encoding(Encoding), FirstInLineIndex(0), - FormattingDisabled(false), MacroBlockBeginRegex(Style.MacroBlockBegin), - MacroBlockEndRegex(Style.MacroBlockEnd) { - Lex.reset(new Lexer(ID, SourceMgr.getBuffer(ID), SourceMgr, - getFormattingLangOpts(Style))); - Lex->SetKeepWhitespaceMode(true); - - for (const std::string &ForEachMacro : Style.ForEachMacros) - Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); - for (const std::string &StatementMacro : Style.StatementMacros) - Macros.insert({&IdentTable.get(StatementMacro), TT_StatementMacro}); -} - -ArrayRef<FormatToken *> FormatTokenLexer::lex() { - assert(Tokens.empty()); - assert(FirstInLineIndex == 0); - do { - Tokens.push_back(getNextToken()); - if (Style.Language == FormatStyle::LK_JavaScript) { - tryParseJSRegexLiteral(); - handleTemplateStrings(); - } - if (Style.Language == FormatStyle::LK_TextProto) - tryParsePythonComment(); - tryMergePreviousTokens(); - if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) - FirstInLineIndex = Tokens.size() - 1; - } while (Tokens.back()->Tok.isNot(tok::eof)); - return Tokens; -} - -void FormatTokenLexer::tryMergePreviousTokens() { - if (tryMerge_TMacro()) - return; - if (tryMergeConflictMarkers()) - return; - if (tryMergeLessLess()) - return; - if (tryMergeNSStringLiteral()) - return; - - if (Style.Language == FormatStyle::LK_JavaScript) { - static const tok::TokenKind JSIdentity[] = {tok::equalequal, tok::equal}; - static const tok::TokenKind JSNotIdentity[] = {tok::exclaimequal, - tok::equal}; - static const tok::TokenKind JSShiftEqual[] = {tok::greater, tok::greater, - tok::greaterequal}; - static const tok::TokenKind JSRightArrow[] = {tok::equal, tok::greater}; - static const tok::TokenKind JSExponentiation[] = {tok::star, tok::star}; - static const tok::TokenKind JSExponentiationEqual[] = {tok::star, - tok::starequal}; - - // FIXME: Investigate what token type gives the correct operator priority. - if (tryMergeTokens(JSIdentity, TT_BinaryOperator)) - return; - if (tryMergeTokens(JSNotIdentity, TT_BinaryOperator)) - return; - if (tryMergeTokens(JSShiftEqual, TT_BinaryOperator)) - return; - if (tryMergeTokens(JSRightArrow, TT_JsFatArrow)) - return; - if (tryMergeTokens(JSExponentiation, TT_JsExponentiation)) - return; - if (tryMergeTokens(JSExponentiationEqual, TT_JsExponentiationEqual)) { - Tokens.back()->Tok.setKind(tok::starequal); - return; - } - } - - if (Style.Language == FormatStyle::LK_Java) { - static const tok::TokenKind JavaRightLogicalShiftAssign[] = { - tok::greater, tok::greater, tok::greaterequal}; - if (tryMergeTokens(JavaRightLogicalShiftAssign, TT_BinaryOperator)) - return; - } -} - -bool FormatTokenLexer::tryMergeNSStringLiteral() { - if (Tokens.size() < 2) - return false; - auto &At = *(Tokens.end() - 2); - auto &String = *(Tokens.end() - 1); - if (!At->is(tok::at) || !String->is(tok::string_literal)) - return false; - At->Tok.setKind(tok::string_literal); - At->TokenText = StringRef(At->TokenText.begin(), - String->TokenText.end() - At->TokenText.begin()); - At->ColumnWidth += String->ColumnWidth; - At->Type = TT_ObjCStringLiteral; - Tokens.erase(Tokens.end() - 1); - return true; -} - -bool FormatTokenLexer::tryMergeLessLess() { - // Merge X,less,less,Y into X,lessless,Y unless X or Y is less. - if (Tokens.size() < 3) - return false; - - bool FourthTokenIsLess = false; - if (Tokens.size() > 3) - FourthTokenIsLess = (Tokens.end() - 4)[0]->is(tok::less); - - auto First = Tokens.end() - 3; - if (First[2]->is(tok::less) || First[1]->isNot(tok::less) || - First[0]->isNot(tok::less) || FourthTokenIsLess) - return false; - - // Only merge if there currently is no whitespace between the two "<". - if (First[1]->WhitespaceRange.getBegin() != - First[1]->WhitespaceRange.getEnd()) - return false; - - First[0]->Tok.setKind(tok::lessless); - First[0]->TokenText = "<<"; - First[0]->ColumnWidth += 1; - Tokens.erase(Tokens.end() - 2); - return true; -} - -bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, - TokenType NewType) { - if (Tokens.size() < Kinds.size()) - return false; - - SmallVectorImpl<FormatToken *>::const_iterator First = - Tokens.end() - Kinds.size(); - if (!First[0]->is(Kinds[0])) - return false; - unsigned AddLength = 0; - for (unsigned i = 1; i < Kinds.size(); ++i) { - if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() != - First[i]->WhitespaceRange.getEnd()) - return false; - AddLength += First[i]->TokenText.size(); - } - Tokens.resize(Tokens.size() - Kinds.size() + 1); - First[0]->TokenText = StringRef(First[0]->TokenText.data(), - First[0]->TokenText.size() + AddLength); - First[0]->ColumnWidth += AddLength; - First[0]->Type = NewType; - return true; -} - -// Returns \c true if \p Tok can only be followed by an operand in JavaScript. -bool FormatTokenLexer::precedesOperand(FormatToken *Tok) { - // NB: This is not entirely correct, as an r_paren can introduce an operand - // location in e.g. `if (foo) /bar/.exec(...);`. That is a rare enough - // corner case to not matter in practice, though. - return Tok->isOneOf(tok::period, tok::l_paren, tok::comma, tok::l_brace, - tok::r_brace, tok::l_square, tok::semi, tok::exclaim, - tok::colon, tok::question, tok::tilde) || - Tok->isOneOf(tok::kw_return, tok::kw_do, tok::kw_case, tok::kw_throw, - tok::kw_else, tok::kw_new, tok::kw_delete, tok::kw_void, - tok::kw_typeof, Keywords.kw_instanceof, Keywords.kw_in) || - Tok->isBinaryOperator(); -} - -bool FormatTokenLexer::canPrecedeRegexLiteral(FormatToken *Prev) { - if (!Prev) - return true; - - // Regex literals can only follow after prefix unary operators, not after - // postfix unary operators. If the '++' is followed by a non-operand - // introducing token, the slash here is the operand and not the start of a - // regex. - // `!` is an unary prefix operator, but also a post-fix operator that casts - // away nullability, so the same check applies. - if (Prev->isOneOf(tok::plusplus, tok::minusminus, tok::exclaim)) - return (Tokens.size() < 3 || precedesOperand(Tokens[Tokens.size() - 3])); - - // The previous token must introduce an operand location where regex - // literals can occur. - if (!precedesOperand(Prev)) - return false; - - return true; -} - -// Tries to parse a JavaScript Regex literal starting at the current token, -// if that begins with a slash and is in a location where JavaScript allows -// regex literals. Changes the current token to a regex literal and updates -// its text if successful. -void FormatTokenLexer::tryParseJSRegexLiteral() { - FormatToken *RegexToken = Tokens.back(); - if (!RegexToken->isOneOf(tok::slash, tok::slashequal)) - return; - - FormatToken *Prev = nullptr; - for (auto I = Tokens.rbegin() + 1, E = Tokens.rend(); I != E; ++I) { - // NB: Because previous pointers are not initialized yet, this cannot use - // Token.getPreviousNonComment. - if ((*I)->isNot(tok::comment)) { - Prev = *I; - break; - } - } - - if (!canPrecedeRegexLiteral(Prev)) - return; - - // 'Manually' lex ahead in the current file buffer. - const char *Offset = Lex->getBufferLocation(); - const char *RegexBegin = Offset - RegexToken->TokenText.size(); - StringRef Buffer = Lex->getBuffer(); - bool InCharacterClass = false; - bool HaveClosingSlash = false; - for (; !HaveClosingSlash && Offset != Buffer.end(); ++Offset) { - // Regular expressions are terminated with a '/', which can only be - // escaped using '\' or a character class between '[' and ']'. - // See http://www.ecma-international.org/ecma-262/5.1/#sec-7.8.5. - switch (*Offset) { - case '\\': - // Skip the escaped character. - ++Offset; - break; - case '[': - InCharacterClass = true; - break; - case ']': - InCharacterClass = false; - break; - case '/': - if (!InCharacterClass) - HaveClosingSlash = true; - break; - } - } - - RegexToken->Type = TT_RegexLiteral; - // Treat regex literals like other string_literals. - RegexToken->Tok.setKind(tok::string_literal); - RegexToken->TokenText = StringRef(RegexBegin, Offset - RegexBegin); - RegexToken->ColumnWidth = RegexToken->TokenText.size(); - - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); -} - -void FormatTokenLexer::handleTemplateStrings() { - FormatToken *BacktickToken = Tokens.back(); - - if (BacktickToken->is(tok::l_brace)) { - StateStack.push(LexerState::NORMAL); - return; - } - if (BacktickToken->is(tok::r_brace)) { - if (StateStack.size() == 1) - return; - StateStack.pop(); - if (StateStack.top() != LexerState::TEMPLATE_STRING) - return; - // If back in TEMPLATE_STRING, fallthrough and continue parsing the - } else if (BacktickToken->is(tok::unknown) && - BacktickToken->TokenText == "`") { - StateStack.push(LexerState::TEMPLATE_STRING); - } else { - return; // Not actually a template - } - - // 'Manually' lex ahead in the current file buffer. - const char *Offset = Lex->getBufferLocation(); - const char *TmplBegin = Offset - BacktickToken->TokenText.size(); // at "`" - for (; Offset != Lex->getBuffer().end(); ++Offset) { - if (Offset[0] == '`') { - StateStack.pop(); - break; - } - if (Offset[0] == '\\') { - ++Offset; // Skip the escaped character. - } else if (Offset + 1 < Lex->getBuffer().end() && Offset[0] == '$' && - Offset[1] == '{') { - // '${' introduces an expression interpolation in the template string. - StateStack.push(LexerState::NORMAL); - ++Offset; - break; - } - } - - StringRef LiteralText(TmplBegin, Offset - TmplBegin + 1); - BacktickToken->Type = TT_TemplateString; - BacktickToken->Tok.setKind(tok::string_literal); - BacktickToken->TokenText = LiteralText; - - // Adjust width for potentially multiline string literals. - size_t FirstBreak = LiteralText.find('\n'); - StringRef FirstLineText = FirstBreak == StringRef::npos - ? LiteralText - : LiteralText.substr(0, FirstBreak); - BacktickToken->ColumnWidth = encoding::columnWidthWithTabs( - FirstLineText, BacktickToken->OriginalColumn, Style.TabWidth, Encoding); - size_t LastBreak = LiteralText.rfind('\n'); - if (LastBreak != StringRef::npos) { - BacktickToken->IsMultiline = true; - unsigned StartColumn = 0; // The template tail spans the entire line. - BacktickToken->LastLineColumnWidth = encoding::columnWidthWithTabs( - LiteralText.substr(LastBreak + 1, LiteralText.size()), StartColumn, - Style.TabWidth, Encoding); - } - - SourceLocation loc = Offset < Lex->getBuffer().end() - ? Lex->getSourceLocation(Offset + 1) - : SourceMgr.getLocForEndOfFile(ID); - resetLexer(SourceMgr.getFileOffset(loc)); -} - -void FormatTokenLexer::tryParsePythonComment() { - FormatToken *HashToken = Tokens.back(); - if (!HashToken->isOneOf(tok::hash, tok::hashhash)) - return; - // Turn the remainder of this line into a comment. - const char *CommentBegin = - Lex->getBufferLocation() - HashToken->TokenText.size(); // at "#" - size_t From = CommentBegin - Lex->getBuffer().begin(); - size_t To = Lex->getBuffer().find_first_of('\n', From); - if (To == StringRef::npos) - To = Lex->getBuffer().size(); - size_t Len = To - From; - HashToken->Type = TT_LineComment; - HashToken->Tok.setKind(tok::comment); - HashToken->TokenText = Lex->getBuffer().substr(From, Len); - SourceLocation Loc = To < Lex->getBuffer().size() - ? Lex->getSourceLocation(CommentBegin + Len) - : SourceMgr.getLocForEndOfFile(ID); - resetLexer(SourceMgr.getFileOffset(Loc)); -} - -bool FormatTokenLexer::tryMerge_TMacro() { - if (Tokens.size() < 4) - return false; - FormatToken *Last = Tokens.back(); - if (!Last->is(tok::r_paren)) - return false; - - FormatToken *String = Tokens[Tokens.size() - 2]; - if (!String->is(tok::string_literal) || String->IsMultiline) - return false; - - if (!Tokens[Tokens.size() - 3]->is(tok::l_paren)) - return false; - - FormatToken *Macro = Tokens[Tokens.size() - 4]; - if (Macro->TokenText != "_T") - return false; - - const char *Start = Macro->TokenText.data(); - const char *End = Last->TokenText.data() + Last->TokenText.size(); - String->TokenText = StringRef(Start, End - Start); - String->IsFirst = Macro->IsFirst; - String->LastNewlineOffset = Macro->LastNewlineOffset; - String->WhitespaceRange = Macro->WhitespaceRange; - String->OriginalColumn = Macro->OriginalColumn; - String->ColumnWidth = encoding::columnWidthWithTabs( - String->TokenText, String->OriginalColumn, Style.TabWidth, Encoding); - String->NewlinesBefore = Macro->NewlinesBefore; - String->HasUnescapedNewline = Macro->HasUnescapedNewline; - - Tokens.pop_back(); - Tokens.pop_back(); - Tokens.pop_back(); - Tokens.back() = String; - return true; -} - -bool FormatTokenLexer::tryMergeConflictMarkers() { - if (Tokens.back()->NewlinesBefore == 0 && Tokens.back()->isNot(tok::eof)) - return false; - - // Conflict lines look like: - // <marker> <text from the vcs> - // For example: - // >>>>>>> /file/in/file/system at revision 1234 - // - // We merge all tokens in a line that starts with a conflict marker - // into a single token with a special token type that the unwrapped line - // parser will use to correctly rebuild the underlying code. - - FileID ID; - // Get the position of the first token in the line. - unsigned FirstInLineOffset; - std::tie(ID, FirstInLineOffset) = SourceMgr.getDecomposedLoc( - Tokens[FirstInLineIndex]->getStartOfNonWhitespace()); - StringRef Buffer = SourceMgr.getBuffer(ID)->getBuffer(); - // Calculate the offset of the start of the current line. - auto LineOffset = Buffer.rfind('\n', FirstInLineOffset); - if (LineOffset == StringRef::npos) { - LineOffset = 0; - } else { - ++LineOffset; - } - - auto FirstSpace = Buffer.find_first_of(" \n", LineOffset); - StringRef LineStart; - if (FirstSpace == StringRef::npos) { - LineStart = Buffer.substr(LineOffset); - } else { - LineStart = Buffer.substr(LineOffset, FirstSpace - LineOffset); - } - - TokenType Type = TT_Unknown; - if (LineStart == "<<<<<<<" || LineStart == ">>>>") { - Type = TT_ConflictStart; - } else if (LineStart == "|||||||" || LineStart == "=======" || - LineStart == "====") { - Type = TT_ConflictAlternative; - } else if (LineStart == ">>>>>>>" || LineStart == "<<<<") { - Type = TT_ConflictEnd; - } - - if (Type != TT_Unknown) { - FormatToken *Next = Tokens.back(); - - Tokens.resize(FirstInLineIndex + 1); - // We do not need to build a complete token here, as we will skip it - // during parsing anyway (as we must not touch whitespace around conflict - // markers). - Tokens.back()->Type = Type; - Tokens.back()->Tok.setKind(tok::kw___unknown_anytype); - - Tokens.push_back(Next); - return true; - } - - return false; -} - -FormatToken *FormatTokenLexer::getStashedToken() { - // Create a synthesized second '>' or '<' token. - Token Tok = FormatTok->Tok; - StringRef TokenText = FormatTok->TokenText; - - unsigned OriginalColumn = FormatTok->OriginalColumn; - FormatTok = new (Allocator.Allocate()) FormatToken; - FormatTok->Tok = Tok; - SourceLocation TokLocation = - FormatTok->Tok.getLocation().getLocWithOffset(Tok.getLength() - 1); - FormatTok->Tok.setLocation(TokLocation); - FormatTok->WhitespaceRange = SourceRange(TokLocation, TokLocation); - FormatTok->TokenText = TokenText; - FormatTok->ColumnWidth = 1; - FormatTok->OriginalColumn = OriginalColumn + 1; - - return FormatTok; -} - -FormatToken *FormatTokenLexer::getNextToken() { - if (StateStack.top() == LexerState::TOKEN_STASHED) { - StateStack.pop(); - return getStashedToken(); - } - - FormatTok = new (Allocator.Allocate()) FormatToken; - readRawToken(*FormatTok); - SourceLocation WhitespaceStart = - FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); - FormatTok->IsFirst = IsFirstToken; - IsFirstToken = false; - - // Consume and record whitespace until we find a significant token. - unsigned WhitespaceLength = TrailingWhitespace; - while (FormatTok->Tok.is(tok::unknown)) { - StringRef Text = FormatTok->TokenText; - auto EscapesNewline = [&](int pos) { - // A '\r' here is just part of '\r\n'. Skip it. - if (pos >= 0 && Text[pos] == '\r') - --pos; - // See whether there is an odd number of '\' before this. - // FIXME: This is wrong. A '\' followed by a newline is always removed, - // regardless of whether there is another '\' before it. - // FIXME: Newlines can also be escaped by a '?' '?' '/' trigraph. - unsigned count = 0; - for (; pos >= 0; --pos, ++count) - if (Text[pos] != '\\') - break; - return count & 1; - }; - // FIXME: This miscounts tok:unknown tokens that are not just - // whitespace, e.g. a '`' character. - for (int i = 0, e = Text.size(); i != e; ++i) { - switch (Text[i]) { - case '\n': - ++FormatTok->NewlinesBefore; - FormatTok->HasUnescapedNewline = !EscapesNewline(i - 1); - FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; - Column = 0; - break; - case '\r': - FormatTok->LastNewlineOffset = WhitespaceLength + i + 1; - Column = 0; - break; - case '\f': - case '\v': - Column = 0; - break; - case ' ': - ++Column; - break; - case '\t': - Column += Style.TabWidth - Column % Style.TabWidth; - break; - case '\\': - if (i + 1 == e || (Text[i + 1] != '\r' && Text[i + 1] != '\n')) - FormatTok->Type = TT_ImplicitStringLiteral; - break; - default: - FormatTok->Type = TT_ImplicitStringLiteral; - break; - } - if (FormatTok->Type == TT_ImplicitStringLiteral) - break; - } - - if (FormatTok->is(TT_ImplicitStringLiteral)) - break; - WhitespaceLength += FormatTok->Tok.getLength(); - - readRawToken(*FormatTok); - } - - // JavaScript and Java do not allow to escape the end of the line with a - // backslash. Backslashes are syntax errors in plain source, but can occur in - // comments. When a single line comment ends with a \, it'll cause the next - // line of code to be lexed as a comment, breaking formatting. The code below - // finds comments that contain a backslash followed by a line break, truncates - // the comment token at the backslash, and resets the lexer to restart behind - // the backslash. - if ((Style.Language == FormatStyle::LK_JavaScript || - Style.Language == FormatStyle::LK_Java) && - FormatTok->is(tok::comment) && FormatTok->TokenText.startswith("//")) { - size_t BackslashPos = FormatTok->TokenText.find('\\'); - while (BackslashPos != StringRef::npos) { - if (BackslashPos + 1 < FormatTok->TokenText.size() && - FormatTok->TokenText[BackslashPos + 1] == '\n') { - const char *Offset = Lex->getBufferLocation(); - Offset -= FormatTok->TokenText.size(); - Offset += BackslashPos + 1; - resetLexer(SourceMgr.getFileOffset(Lex->getSourceLocation(Offset))); - FormatTok->TokenText = FormatTok->TokenText.substr(0, BackslashPos + 1); - FormatTok->ColumnWidth = encoding::columnWidthWithTabs( - FormatTok->TokenText, FormatTok->OriginalColumn, Style.TabWidth, - Encoding); - break; - } - BackslashPos = FormatTok->TokenText.find('\\', BackslashPos + 1); - } - } - - // In case the token starts with escaped newlines, we want to - // take them into account as whitespace - this pattern is quite frequent - // in macro definitions. - // FIXME: Add a more explicit test. - while (FormatTok->TokenText.size() > 1 && FormatTok->TokenText[0] == '\\') { - unsigned SkippedWhitespace = 0; - if (FormatTok->TokenText.size() > 2 && - (FormatTok->TokenText[1] == '\r' && FormatTok->TokenText[2] == '\n')) - SkippedWhitespace = 3; - else if (FormatTok->TokenText[1] == '\n') - SkippedWhitespace = 2; - else - break; - - ++FormatTok->NewlinesBefore; - WhitespaceLength += SkippedWhitespace; - FormatTok->LastNewlineOffset = SkippedWhitespace; - Column = 0; - FormatTok->TokenText = FormatTok->TokenText.substr(SkippedWhitespace); - } - - FormatTok->WhitespaceRange = SourceRange( - WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); - - FormatTok->OriginalColumn = Column; - - TrailingWhitespace = 0; - if (FormatTok->Tok.is(tok::comment)) { - // FIXME: Add the trimmed whitespace to Column. - StringRef UntrimmedText = FormatTok->TokenText; - FormatTok->TokenText = FormatTok->TokenText.rtrim(" \t\v\f"); - TrailingWhitespace = UntrimmedText.size() - FormatTok->TokenText.size(); - } else if (FormatTok->Tok.is(tok::raw_identifier)) { - IdentifierInfo &Info = IdentTable.get(FormatTok->TokenText); - FormatTok->Tok.setIdentifierInfo(&Info); - FormatTok->Tok.setKind(Info.getTokenID()); - if (Style.Language == FormatStyle::LK_Java && - FormatTok->isOneOf(tok::kw_struct, tok::kw_union, tok::kw_delete, - tok::kw_operator)) { - FormatTok->Tok.setKind(tok::identifier); - FormatTok->Tok.setIdentifierInfo(nullptr); - } else if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->isOneOf(tok::kw_struct, tok::kw_union, - tok::kw_operator)) { - FormatTok->Tok.setKind(tok::identifier); - FormatTok->Tok.setIdentifierInfo(nullptr); - } - } else if (FormatTok->Tok.is(tok::greatergreater)) { - FormatTok->Tok.setKind(tok::greater); - FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - ++Column; - StateStack.push(LexerState::TOKEN_STASHED); - } else if (FormatTok->Tok.is(tok::lessless)) { - FormatTok->Tok.setKind(tok::less); - FormatTok->TokenText = FormatTok->TokenText.substr(0, 1); - ++Column; - StateStack.push(LexerState::TOKEN_STASHED); - } - - // Now FormatTok is the next non-whitespace token. - - StringRef Text = FormatTok->TokenText; - size_t FirstNewlinePos = Text.find('\n'); - if (FirstNewlinePos == StringRef::npos) { - // FIXME: ColumnWidth actually depends on the start column, we need to - // take this into account when the token is moved. - FormatTok->ColumnWidth = - encoding::columnWidthWithTabs(Text, Column, Style.TabWidth, Encoding); - Column += FormatTok->ColumnWidth; - } else { - FormatTok->IsMultiline = true; - // FIXME: ColumnWidth actually depends on the start column, we need to - // take this into account when the token is moved. - FormatTok->ColumnWidth = encoding::columnWidthWithTabs( - Text.substr(0, FirstNewlinePos), Column, Style.TabWidth, Encoding); - - // The last line of the token always starts in column 0. - // Thus, the length can be precomputed even in the presence of tabs. - FormatTok->LastLineColumnWidth = encoding::columnWidthWithTabs( - Text.substr(Text.find_last_of('\n') + 1), 0, Style.TabWidth, Encoding); - Column = FormatTok->LastLineColumnWidth; - } - - if (Style.isCpp()) { - auto it = Macros.find(FormatTok->Tok.getIdentifierInfo()); - if (!(Tokens.size() > 0 && Tokens.back()->Tok.getIdentifierInfo() && - Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == - tok::pp_define) && - it != Macros.end()) { - FormatTok->Type = it->second; - } else if (FormatTok->is(tok::identifier)) { - if (MacroBlockBeginRegex.match(Text)) { - FormatTok->Type = TT_MacroBlockBegin; - } else if (MacroBlockEndRegex.match(Text)) { - FormatTok->Type = TT_MacroBlockEnd; - } - } - } - - return FormatTok; -} - -void FormatTokenLexer::readRawToken(FormatToken &Tok) { - Lex->LexFromRawLexer(Tok.Tok); - Tok.TokenText = StringRef(SourceMgr.getCharacterData(Tok.Tok.getLocation()), - Tok.Tok.getLength()); - // For formatting, treat unterminated string literals like normal string - // literals. - if (Tok.is(tok::unknown)) { - if (!Tok.TokenText.empty() && Tok.TokenText[0] == '"') { - Tok.Tok.setKind(tok::string_literal); - Tok.IsUnterminatedLiteral = true; - } else if (Style.Language == FormatStyle::LK_JavaScript && - Tok.TokenText == "''") { - Tok.Tok.setKind(tok::string_literal); - } - } - - if ((Style.Language == FormatStyle::LK_JavaScript || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - Tok.is(tok::char_constant)) { - Tok.Tok.setKind(tok::string_literal); - } - - if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format on" || - Tok.TokenText == "/* clang-format on */")) { - FormattingDisabled = false; - } - - Tok.Finalized = FormattingDisabled; - - if (Tok.is(tok::comment) && (Tok.TokenText == "// clang-format off" || - Tok.TokenText == "/* clang-format off */")) { - FormattingDisabled = true; - } -} - -void FormatTokenLexer::resetLexer(unsigned Offset) { - StringRef Buffer = SourceMgr.getBufferData(ID); - Lex.reset(new Lexer(SourceMgr.getLocForStartOfFile(ID), - getFormattingLangOpts(Style), Buffer.begin(), - Buffer.begin() + Offset, Buffer.end())); - Lex->SetKeepWhitespaceMode(true); - TrailingWhitespace = 0; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.h b/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.h deleted file mode 100644 index 0cf357c85f3..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/FormatTokenLexer.h +++ /dev/null @@ -1,119 +0,0 @@ -//===--- FormatTokenLexer.h - Format C++ code ----------------*- C++ ----*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains FormatTokenLexer, which tokenizes a source file -/// into a token stream suitable for ClangFormat. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H -#define LLVM_CLANG_LIB_FORMAT_FORMATTOKENLEXER_H - -#include "Encoding.h" -#include "FormatToken.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Regex.h" -#include "llvm/ADT/MapVector.h" - -#include <stack> - -namespace clang { -namespace format { - -enum LexerState { - NORMAL, - TEMPLATE_STRING, - TOKEN_STASHED, -}; - -class FormatTokenLexer { -public: - FormatTokenLexer(const SourceManager &SourceMgr, FileID ID, unsigned Column, - const FormatStyle &Style, encoding::Encoding Encoding); - - ArrayRef<FormatToken *> lex(); - - const AdditionalKeywords &getKeywords() { return Keywords; } - -private: - void tryMergePreviousTokens(); - - bool tryMergeLessLess(); - bool tryMergeNSStringLiteral(); - - bool tryMergeTokens(ArrayRef<tok::TokenKind> Kinds, TokenType NewType); - - // Returns \c true if \p Tok can only be followed by an operand in JavaScript. - bool precedesOperand(FormatToken *Tok); - - bool canPrecedeRegexLiteral(FormatToken *Prev); - - // Tries to parse a JavaScript Regex literal starting at the current token, - // if that begins with a slash and is in a location where JavaScript allows - // regex literals. Changes the current token to a regex literal and updates - // its text if successful. - void tryParseJSRegexLiteral(); - - // Handles JavaScript template strings. - // - // JavaScript template strings use backticks ('`') as delimiters, and allow - // embedding expressions nested in ${expr-here}. Template strings can be - // nested recursively, i.e. expressions can contain template strings in turn. - // - // The code below parses starting from a backtick, up to a closing backtick or - // an opening ${. It also maintains a stack of lexing contexts to handle - // nested template parts by balancing curly braces. - void handleTemplateStrings(); - - void tryParsePythonComment(); - - bool tryMerge_TMacro(); - - bool tryMergeConflictMarkers(); - - FormatToken *getStashedToken(); - - FormatToken *getNextToken(); - - FormatToken *FormatTok; - bool IsFirstToken; - std::stack<LexerState> StateStack; - unsigned Column; - unsigned TrailingWhitespace; - std::unique_ptr<Lexer> Lex; - const SourceManager &SourceMgr; - FileID ID; - const FormatStyle &Style; - IdentifierTable IdentTable; - AdditionalKeywords Keywords; - encoding::Encoding Encoding; - llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; - // Index (in 'Tokens') of the last token that starts a new line. - unsigned FirstInLineIndex; - SmallVector<FormatToken *, 16> Tokens; - - llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; - - bool FormattingDisabled; - - llvm::Regex MacroBlockBeginRegex; - llvm::Regex MacroBlockEndRegex; - - void readRawToken(FormatToken &Tok); - - void resetLexer(unsigned Offset); -}; - -} // namespace format -} // namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp b/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp deleted file mode 100644 index dd364866d1c..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.cpp +++ /dev/null @@ -1,207 +0,0 @@ -//===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that -/// fixes namespace end comments. -/// -//===----------------------------------------------------------------------===// - -#include "NamespaceEndCommentsFixer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Regex.h" - -#define DEBUG_TYPE "namespace-end-comments-fixer" - -namespace clang { -namespace format { - -namespace { -// The maximal number of unwrapped lines that a short namespace spans. -// Short namespaces don't need an end comment. -static const int kShortNamespaceMaxLines = 1; - -// Computes the name of a namespace given the namespace token. -// Returns "" for anonymous namespace. -std::string computeName(const FormatToken *NamespaceTok) { - assert(NamespaceTok && NamespaceTok->is(tok::kw_namespace) && - "expecting a namespace token"); - std::string name = ""; - // Collects all the non-comment tokens between 'namespace' and '{'. - const FormatToken *Tok = NamespaceTok->getNextNonComment(); - while (Tok && !Tok->is(tok::l_brace)) { - name += Tok->TokenText; - Tok = Tok->getNextNonComment(); - } - return name; -} - -std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline) { - std::string text = "// namespace"; - if (!NamespaceName.empty()) { - text += ' '; - text += NamespaceName; - } - if (AddNewline) - text += '\n'; - return text; -} - -bool hasEndComment(const FormatToken *RBraceTok) { - return RBraceTok->Next && RBraceTok->Next->is(tok::comment); -} - -bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName) { - assert(hasEndComment(RBraceTok)); - const FormatToken *Comment = RBraceTok->Next; - - // Matches a valid namespace end comment. - // Valid namespace end comments don't need to be edited. - static llvm::Regex *const NamespaceCommentPattern = - new llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" - "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", - llvm::Regex::IgnoreCase); - SmallVector<StringRef, 7> Groups; - if (NamespaceCommentPattern->match(Comment->TokenText, &Groups)) { - StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; - // Anonymous namespace comments must not mention a namespace name. - if (NamespaceName.empty() && !NamespaceNameInComment.empty()) - return false; - StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; - // Named namespace comments must not mention anonymous namespace. - if (!NamespaceName.empty() && !AnonymousInComment.empty()) - return false; - return NamespaceNameInComment == NamespaceName; - } - return false; -} - -void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, - const SourceManager &SourceMgr, - tooling::Replacements *Fixes) { - auto EndLoc = RBraceTok->Tok.getEndLoc(); - auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); - auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); - if (Err) { - llvm::errs() << "Error while adding namespace end comment: " - << llvm::toString(std::move(Err)) << "\n"; - } -} - -void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, - const SourceManager &SourceMgr, - tooling::Replacements *Fixes) { - assert(hasEndComment(RBraceTok)); - const FormatToken *Comment = RBraceTok->Next; - auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), - Comment->Tok.getEndLoc()); - auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); - if (Err) { - llvm::errs() << "Error while updating namespace end comment: " - << llvm::toString(std::move(Err)) << "\n"; - } -} -} // namespace - -const FormatToken * -getNamespaceToken(const AnnotatedLine *Line, - const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) - return nullptr; - size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; - if (StartLineIndex == UnwrappedLine::kInvalidIndex) - return nullptr; - assert(StartLineIndex < AnnotatedLines.size()); - const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; - if (NamespaceTok->is(tok::l_brace)) { - // "namespace" keyword can be on the line preceding '{', e.g. in styles - // where BraceWrapping.AfterNamespace is true. - if (StartLineIndex > 0) - NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; - } - return NamespaceTok->getNamespaceToken(); -} - -NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, - const FormatStyle &Style) - : TokenAnalyzer(Env, Style) {} - -std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( - TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) { - const SourceManager &SourceMgr = Env.getSourceManager(); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - tooling::Replacements Fixes; - std::string AllNamespaceNames = ""; - size_t StartLineIndex = SIZE_MAX; - unsigned int CompactedNamespacesCount = 0; - for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { - const AnnotatedLine *EndLine = AnnotatedLines[I]; - const FormatToken *NamespaceTok = - getNamespaceToken(EndLine, AnnotatedLines); - if (!NamespaceTok) - continue; - FormatToken *RBraceTok = EndLine->First; - if (RBraceTok->Finalized) - continue; - RBraceTok->Finalized = true; - const FormatToken *EndCommentPrevTok = RBraceTok; - // Namespaces often end with '};'. In that case, attach namespace end - // comments to the semicolon tokens. - if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) { - EndCommentPrevTok = RBraceTok->Next; - } - if (StartLineIndex == SIZE_MAX) - StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; - std::string NamespaceName = computeName(NamespaceTok); - if (Style.CompactNamespaces) { - if ((I + 1 < E) && - getNamespaceToken(AnnotatedLines[I + 1], AnnotatedLines) && - StartLineIndex - CompactedNamespacesCount - 1 == - AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && - !AnnotatedLines[I + 1]->First->Finalized) { - if (hasEndComment(EndCommentPrevTok)) { - // remove end comment, it will be merged in next one - updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); - } - CompactedNamespacesCount++; - AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; - continue; - } - NamespaceName += AllNamespaceNames; - CompactedNamespacesCount = 0; - AllNamespaceNames = std::string(); - } - // The next token in the token stream after the place where the end comment - // token must be. This is either the next token on the current line or the - // first token on the next line. - const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; - if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) - EndCommentNextTok = EndCommentNextTok->Next; - if (!EndCommentNextTok && I + 1 < E) - EndCommentNextTok = AnnotatedLines[I + 1]->First; - bool AddNewline = EndCommentNextTok && - EndCommentNextTok->NewlinesBefore == 0 && - EndCommentNextTok->isNot(tok::eof); - const std::string EndCommentText = - computeEndCommentText(NamespaceName, AddNewline); - if (!hasEndComment(EndCommentPrevTok)) { - bool isShort = I - StartLineIndex <= kShortNamespaceMaxLines + 1; - if (!isShort) - addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); - } else if (!validEndComment(EndCommentPrevTok, NamespaceName)) { - updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); - } - StartLineIndex = SIZE_MAX; - } - return {Fixes, 0}; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h b/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h deleted file mode 100644 index 07a1c7bb0c3..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/NamespaceEndCommentsFixer.h +++ /dev/null @@ -1,47 +0,0 @@ -//===--- NamespaceEndCommentsFixer.h ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file declares NamespaceEndCommentsFixer, a TokenAnalyzer that -/// fixes namespace end comments. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H -#define LLVM_CLANG_LIB_FORMAT_NAMESPACEENDCOMMENTSFIXER_H - -#include "TokenAnalyzer.h" - -namespace clang { -namespace format { - -// Finds the namespace token corresponding to a closing namespace `}`, if that -// is to be formatted. -// If \p Line contains the closing `}` of a namespace, is affected and is not in -// a preprocessor directive, the result will be the matching namespace token. -// Otherwise returns null. -// \p AnnotatedLines is the sequence of lines from which \p Line is a member of. -const FormatToken * -getNamespaceToken(const AnnotatedLine *Line, - const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines); - -class NamespaceEndCommentsFixer : public TokenAnalyzer { -public: - NamespaceEndCommentsFixer(const Environment &Env, const FormatStyle &Style); - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override; -}; - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp b/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp deleted file mode 100644 index 2ec577382ff..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.cpp +++ /dev/null @@ -1,454 +0,0 @@ -//===--- SortJavaScriptImports.cpp - Sort ES6 Imports -----------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a sort operation for JavaScript ES6 imports. -/// -//===----------------------------------------------------------------------===// - -#include "SortJavaScriptImports.h" -#include "TokenAnalyzer.h" -#include "TokenAnnotator.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Debug.h" -#include <algorithm> -#include <string> - -#define DEBUG_TYPE "format-formatter" - -namespace clang { -namespace format { - -class FormatTokenLexer; - -using clang::format::FormatStyle; - -// An imported symbol in a JavaScript ES6 import/export, possibly aliased. -struct JsImportedSymbol { - StringRef Symbol; - StringRef Alias; - SourceRange Range; - - bool operator==(const JsImportedSymbol &RHS) const { - // Ignore Range for comparison, it is only used to stitch code together, - // but imports at different code locations are still conceptually the same. - return Symbol == RHS.Symbol && Alias == RHS.Alias; - } -}; - -// An ES6 module reference. -// -// ES6 implements a module system, where individual modules (~= source files) -// can reference other modules, either importing symbols from them, or exporting -// symbols from them: -// import {foo} from 'foo'; -// export {foo}; -// export {bar} from 'bar'; -// -// `export`s with URLs are syntactic sugar for an import of the symbol from the -// URL, followed by an export of the symbol, allowing this code to treat both -// statements more or less identically, with the exception being that `export`s -// are sorted last. -// -// imports and exports support individual symbols, but also a wildcard syntax: -// import * as prefix from 'foo'; -// export * from 'bar'; -// -// This struct represents both exports and imports to build up the information -// required for sorting module references. -struct JsModuleReference { - bool IsExport = false; - // Module references are sorted into these categories, in order. - enum ReferenceCategory { - SIDE_EFFECT, // "import 'something';" - ABSOLUTE, // from 'something' - RELATIVE_PARENT, // from '../*' - RELATIVE, // from './*' - }; - ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT; - // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`. - StringRef URL; - // Prefix from "import * as prefix". Empty for symbol imports and `export *`. - // Implies an empty names list. - StringRef Prefix; - // Symbols from `import {SymbolA, SymbolB, ...} from ...;`. - SmallVector<JsImportedSymbol, 1> Symbols; - // Textual position of the import/export, including preceding and trailing - // comments. - SourceRange Range; -}; - -bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { - if (LHS.IsExport != RHS.IsExport) - return LHS.IsExport < RHS.IsExport; - if (LHS.Category != RHS.Category) - return LHS.Category < RHS.Category; - if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT) - // Side effect imports might be ordering sensitive. Consider them equal so - // that they maintain their relative order in the stable sort below. - // This retains transitivity because LHS.Category == RHS.Category here. - return false; - // Empty URLs sort *last* (for export {...};). - if (LHS.URL.empty() != RHS.URL.empty()) - return LHS.URL.empty() < RHS.URL.empty(); - if (int Res = LHS.URL.compare_lower(RHS.URL)) - return Res < 0; - // '*' imports (with prefix) sort before {a, b, ...} imports. - if (LHS.Prefix.empty() != RHS.Prefix.empty()) - return LHS.Prefix.empty() < RHS.Prefix.empty(); - if (LHS.Prefix != RHS.Prefix) - return LHS.Prefix > RHS.Prefix; - return false; -} - -// JavaScriptImportSorter sorts JavaScript ES6 imports and exports. It is -// implemented as a TokenAnalyzer because ES6 imports have substantial syntactic -// structure, making it messy to sort them using regular expressions. -class JavaScriptImportSorter : public TokenAnalyzer { -public: - JavaScriptImportSorter(const Environment &Env, const FormatStyle &Style) - : TokenAnalyzer(Env, Style), - FileContents(Env.getSourceManager().getBufferData(Env.getFileID())) {} - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override { - tooling::Replacements Result; - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - - const AdditionalKeywords &Keywords = Tokens.getKeywords(); - SmallVector<JsModuleReference, 16> References; - AnnotatedLine *FirstNonImportLine; - std::tie(References, FirstNonImportLine) = - parseModuleReferences(Keywords, AnnotatedLines); - - if (References.empty()) - return {Result, 0}; - - SmallVector<unsigned, 16> Indices; - for (unsigned i = 0, e = References.size(); i != e; ++i) - Indices.push_back(i); - std::stable_sort(Indices.begin(), Indices.end(), - [&](unsigned LHSI, unsigned RHSI) { - return References[LHSI] < References[RHSI]; - }); - bool ReferencesInOrder = std::is_sorted(Indices.begin(), Indices.end()); - - std::string ReferencesText; - bool SymbolsInOrder = true; - for (unsigned i = 0, e = Indices.size(); i != e; ++i) { - JsModuleReference Reference = References[Indices[i]]; - if (appendReference(ReferencesText, Reference)) - SymbolsInOrder = false; - if (i + 1 < e) { - // Insert breaks between imports and exports. - ReferencesText += "\n"; - // Separate imports groups with two line breaks, but keep all exports - // in a single group. - if (!Reference.IsExport && - (Reference.IsExport != References[Indices[i + 1]].IsExport || - Reference.Category != References[Indices[i + 1]].Category)) - ReferencesText += "\n"; - } - } - - if (ReferencesInOrder && SymbolsInOrder) - return {Result, 0}; - - SourceRange InsertionPoint = References[0].Range; - InsertionPoint.setEnd(References[References.size() - 1].Range.getEnd()); - - // The loop above might collapse previously existing line breaks between - // import blocks, and thus shrink the file. SortIncludes must not shrink - // overall source length as there is currently no re-calculation of ranges - // after applying source sorting. - // This loop just backfills trailing spaces after the imports, which are - // harmless and will be stripped by the subsequent formatting pass. - // FIXME: A better long term fix is to re-calculate Ranges after sorting. - unsigned PreviousSize = getSourceText(InsertionPoint).size(); - while (ReferencesText.size() < PreviousSize) { - ReferencesText += " "; - } - - // Separate references from the main code body of the file. - if (FirstNonImportLine && FirstNonImportLine->First->NewlinesBefore < 2) - ReferencesText += "\n"; - - LLVM_DEBUG(llvm::dbgs() << "Replacing imports:\n" - << getSourceText(InsertionPoint) << "\nwith:\n" - << ReferencesText << "\n"); - auto Err = Result.add(tooling::Replacement( - Env.getSourceManager(), CharSourceRange::getCharRange(InsertionPoint), - ReferencesText)); - // FIXME: better error handling. For now, just print error message and skip - // the replacement for the release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false); - } - - return {Result, 0}; - } - -private: - FormatToken *Current; - FormatToken *LineEnd; - - FormatToken invalidToken; - - StringRef FileContents; - - void skipComments() { Current = skipComments(Current); } - - FormatToken *skipComments(FormatToken *Tok) { - while (Tok && Tok->is(tok::comment)) - Tok = Tok->Next; - return Tok; - } - - void nextToken() { - Current = Current->Next; - skipComments(); - if (!Current || Current == LineEnd->Next) { - // Set the current token to an invalid token, so that further parsing on - // this line fails. - invalidToken.Tok.setKind(tok::unknown); - Current = &invalidToken; - } - } - - StringRef getSourceText(SourceRange Range) { - return getSourceText(Range.getBegin(), Range.getEnd()); - } - - StringRef getSourceText(SourceLocation Begin, SourceLocation End) { - const SourceManager &SM = Env.getSourceManager(); - return FileContents.substr(SM.getFileOffset(Begin), - SM.getFileOffset(End) - SM.getFileOffset(Begin)); - } - - // Appends ``Reference`` to ``Buffer``, returning true if text within the - // ``Reference`` changed (e.g. symbol order). - bool appendReference(std::string &Buffer, JsModuleReference &Reference) { - // Sort the individual symbols within the import. - // E.g. `import {b, a} from 'x';` -> `import {a, b} from 'x';` - SmallVector<JsImportedSymbol, 1> Symbols = Reference.Symbols; - std::stable_sort( - Symbols.begin(), Symbols.end(), - [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) { - return LHS.Symbol.compare_lower(RHS.Symbol) < 0; - }); - if (Symbols == Reference.Symbols) { - // No change in symbol order. - StringRef ReferenceStmt = getSourceText(Reference.Range); - Buffer += ReferenceStmt; - return false; - } - // Stitch together the module reference start... - SourceLocation SymbolsStart = Reference.Symbols.front().Range.getBegin(); - SourceLocation SymbolsEnd = Reference.Symbols.back().Range.getEnd(); - Buffer += getSourceText(Reference.Range.getBegin(), SymbolsStart); - // ... then the references in order ... - for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) { - if (I != Symbols.begin()) - Buffer += ","; - Buffer += getSourceText(I->Range); - } - // ... followed by the module reference end. - Buffer += getSourceText(SymbolsEnd, Reference.Range.getEnd()); - return true; - } - - // Parses module references in the given lines. Returns the module references, - // and a pointer to the first "main code" line if that is adjacent to the - // affected lines of module references, nullptr otherwise. - std::pair<SmallVector<JsModuleReference, 16>, AnnotatedLine *> - parseModuleReferences(const AdditionalKeywords &Keywords, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - SmallVector<JsModuleReference, 16> References; - SourceLocation Start; - AnnotatedLine *FirstNonImportLine = nullptr; - bool AnyImportAffected = false; - for (auto Line : AnnotatedLines) { - Current = Line->First; - LineEnd = Line->Last; - skipComments(); - if (Start.isInvalid() || References.empty()) - // After the first file level comment, consider line comments to be part - // of the import that immediately follows them by using the previously - // set Start. - Start = Line->First->Tok.getLocation(); - if (!Current) { - // Only comments on this line. Could be the first non-import line. - FirstNonImportLine = Line; - continue; - } - JsModuleReference Reference; - Reference.Range.setBegin(Start); - if (!parseModuleReference(Keywords, Reference)) { - if (!FirstNonImportLine) - FirstNonImportLine = Line; // if no comment before. - break; - } - FirstNonImportLine = nullptr; - AnyImportAffected = AnyImportAffected || Line->Affected; - Reference.Range.setEnd(LineEnd->Tok.getEndLoc()); - LLVM_DEBUG({ - llvm::dbgs() << "JsModuleReference: {" - << "is_export: " << Reference.IsExport - << ", cat: " << Reference.Category - << ", url: " << Reference.URL - << ", prefix: " << Reference.Prefix; - for (size_t i = 0; i < Reference.Symbols.size(); ++i) - llvm::dbgs() << ", " << Reference.Symbols[i].Symbol << " as " - << Reference.Symbols[i].Alias; - llvm::dbgs() << ", text: " << getSourceText(Reference.Range); - llvm::dbgs() << "}\n"; - }); - References.push_back(Reference); - Start = SourceLocation(); - } - // Sort imports if any import line was affected. - if (!AnyImportAffected) - References.clear(); - return std::make_pair(References, FirstNonImportLine); - } - - // Parses a JavaScript/ECMAScript 6 module reference. - // See http://www.ecma-international.org/ecma-262/6.0/#sec-scripts-and-modules - // for grammar EBNF (production ModuleItem). - bool parseModuleReference(const AdditionalKeywords &Keywords, - JsModuleReference &Reference) { - if (!Current || !Current->isOneOf(Keywords.kw_import, tok::kw_export)) - return false; - Reference.IsExport = Current->is(tok::kw_export); - - nextToken(); - if (Current->isStringLiteral() && !Reference.IsExport) { - // "import 'side-effect';" - Reference.Category = JsModuleReference::ReferenceCategory::SIDE_EFFECT; - Reference.URL = - Current->TokenText.substr(1, Current->TokenText.size() - 2); - return true; - } - - if (!parseModuleBindings(Keywords, Reference)) - return false; - - if (Current->is(Keywords.kw_from)) { - // imports have a 'from' clause, exports might not. - nextToken(); - if (!Current->isStringLiteral()) - return false; - // URL = TokenText without the quotes. - Reference.URL = - Current->TokenText.substr(1, Current->TokenText.size() - 2); - if (Reference.URL.startswith("..")) - Reference.Category = - JsModuleReference::ReferenceCategory::RELATIVE_PARENT; - else if (Reference.URL.startswith(".")) - Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; - else - Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE; - } else { - // w/o URL groups with "empty". - Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE; - } - return true; - } - - bool parseModuleBindings(const AdditionalKeywords &Keywords, - JsModuleReference &Reference) { - if (parseStarBinding(Keywords, Reference)) - return true; - return parseNamedBindings(Keywords, Reference); - } - - bool parseStarBinding(const AdditionalKeywords &Keywords, - JsModuleReference &Reference) { - // * as prefix from '...'; - if (Current->isNot(tok::star)) - return false; - nextToken(); - if (Current->isNot(Keywords.kw_as)) - return false; - nextToken(); - if (Current->isNot(tok::identifier)) - return false; - Reference.Prefix = Current->TokenText; - nextToken(); - return true; - } - - bool parseNamedBindings(const AdditionalKeywords &Keywords, - JsModuleReference &Reference) { - if (Current->is(tok::identifier)) { - nextToken(); - if (Current->is(Keywords.kw_from)) - return true; - if (Current->isNot(tok::comma)) - return false; - nextToken(); // eat comma. - } - if (Current->isNot(tok::l_brace)) - return false; - - // {sym as alias, sym2 as ...} from '...'; - while (Current->isNot(tok::r_brace)) { - nextToken(); - if (Current->is(tok::r_brace)) - break; - if (!Current->isOneOf(tok::identifier, tok::kw_default)) - return false; - - JsImportedSymbol Symbol; - Symbol.Symbol = Current->TokenText; - // Make sure to include any preceding comments. - Symbol.Range.setBegin( - Current->getPreviousNonComment()->Next->WhitespaceRange.getBegin()); - nextToken(); - - if (Current->is(Keywords.kw_as)) { - nextToken(); - if (!Current->isOneOf(tok::identifier, tok::kw_default)) - return false; - Symbol.Alias = Current->TokenText; - nextToken(); - } - Symbol.Range.setEnd(Current->Tok.getLocation()); - Reference.Symbols.push_back(Symbol); - - if (!Current->isOneOf(tok::r_brace, tok::comma)) - return false; - } - nextToken(); // consume r_brace - return true; - } -}; - -tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, - StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName) { - // FIXME: Cursor support. - return JavaScriptImportSorter(Environment(Code, FileName, Ranges), Style) - .process() - .first; -} - -} // end namespace format -} // end namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.h b/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.h deleted file mode 100644 index ecab0ae54cb..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/SortJavaScriptImports.h +++ /dev/null @@ -1,36 +0,0 @@ -//===--- SortJavaScriptImports.h - Sort ES6 Imports -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a sorter for JavaScript ES6 imports. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H -#define LLVM_CLANG_LIB_FORMAT_SORTJAVASCRIPTIMPORTS_H - -#include "clang/Basic/LLVM.h" -#include "clang/Format/Format.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" - -namespace clang { -namespace format { - -// Sort JavaScript ES6 imports/exports in ``Code``. The generated replacements -// only monotonically increase the length of the given code. -tooling::Replacements sortJavaScriptImports(const FormatStyle &Style, - StringRef Code, - ArrayRef<tooling::Range> Ranges, - StringRef FileName); - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp b/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp deleted file mode 100644 index 99fc61ef1c3..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.cpp +++ /dev/null @@ -1,125 +0,0 @@ -//===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements an abstract TokenAnalyzer and associated helper -/// classes. TokenAnalyzer can be extended to generate replacements based on -/// an annotated and pre-processed token stream. -/// -//===----------------------------------------------------------------------===// - -#include "TokenAnalyzer.h" -#include "AffectedRangeManager.h" -#include "Encoding.h" -#include "FormatToken.h" -#include "FormatTokenLexer.h" -#include "TokenAnnotator.h" -#include "UnwrappedLineParser.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "format-formatter" - -namespace clang { -namespace format { - -Environment::Environment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges, - unsigned FirstStartColumn, unsigned NextStartColumn, - unsigned LastStartColumn) - : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), - ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), - NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) { - SourceLocation StartOfFile = SM.getLocForStartOfFile(ID); - for (const tooling::Range &Range : Ranges) { - SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); - SourceLocation End = Start.getLocWithOffset(Range.getLength()); - CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); - } -} - -TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) - : Style(Style), Env(Env), - AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()), - UnwrappedLines(1), - Encoding(encoding::detectEncoding( - Env.getSourceManager().getBufferData(Env.getFileID()))) { - LLVM_DEBUG( - llvm::dbgs() << "File encoding: " - << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") - << "\n"); - LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) - << "\n"); -} - -std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() { - tooling::Replacements Result; - FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(), - Env.getFirstStartColumn(), Style, Encoding); - - UnwrappedLineParser Parser(Style, Tokens.getKeywords(), - Env.getFirstStartColumn(), Tokens.lex(), *this); - Parser.parse(); - assert(UnwrappedLines.rbegin()->empty()); - unsigned Penalty = 0; - for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { - LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); - SmallVector<AnnotatedLine *, 16> AnnotatedLines; - - TokenAnnotator Annotator(Style, Tokens.getKeywords()); - for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) { - AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i])); - Annotator.annotate(*AnnotatedLines.back()); - } - - std::pair<tooling::Replacements, unsigned> RunResult = - analyze(Annotator, AnnotatedLines, Tokens); - - LLVM_DEBUG({ - llvm::dbgs() << "Replacements for run " << Run << ":\n"; - for (tooling::Replacements::const_iterator I = RunResult.first.begin(), - E = RunResult.first.end(); - I != E; ++I) { - llvm::dbgs() << I->toString() << "\n"; - } - }); - for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { - delete AnnotatedLines[i]; - } - - Penalty += RunResult.second; - for (const auto &R : RunResult.first) { - auto Err = Result.add(R); - // FIXME: better error handling here. For now, simply return an empty - // Replacements to indicate failure. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - return {tooling::Replacements(), 0}; - } - } - } - return {Result, Penalty}; -} - -void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) { - assert(!UnwrappedLines.empty()); - UnwrappedLines.back().push_back(TheLine); -} - -void TokenAnalyzer::finishRun() { - UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); -} - -} // end namespace format -} // end namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.h b/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.h deleted file mode 100644 index e43a860e46c..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/TokenAnalyzer.h +++ /dev/null @@ -1,113 +0,0 @@ -//===--- TokenAnalyzer.h - Analyze Token Streams ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file declares an abstract TokenAnalyzer, and associated helper -/// classes. TokenAnalyzer can be extended to generate replacements based on -/// an annotated and pre-processed token stream. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H -#define LLVM_CLANG_LIB_FORMAT_TOKENANALYZER_H - -#include "AffectedRangeManager.h" -#include "Encoding.h" -#include "FormatToken.h" -#include "FormatTokenLexer.h" -#include "TokenAnnotator.h" -#include "UnwrappedLineParser.h" -#include "clang/Basic/Diagnostic.h" -#include "clang/Basic/DiagnosticOptions.h" -#include "clang/Basic/FileManager.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" - -namespace clang { -namespace format { - -class Environment { -public: - Environment(SourceManager &SM, FileID ID, ArrayRef<CharSourceRange> Ranges) - : SM(SM), ID(ID), CharRanges(Ranges.begin(), Ranges.end()), - FirstStartColumn(0), NextStartColumn(0), LastStartColumn(0) {} - - // This sets up an virtual file system with file \p FileName containing the - // fragment \p Code. Assumes that \p Code starts at \p FirstStartColumn, - // that the next lines of \p Code should start at \p NextStartColumn, and - // that \p Code should end at \p LastStartColumn if it ends in newline. - // See also the documentation of clang::format::internal::reformat. - Environment(StringRef Code, StringRef FileName, - ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn = 0, - unsigned NextStartColumn = 0, unsigned LastStartColumn = 0); - - FileID getFileID() const { return ID; } - - const SourceManager &getSourceManager() const { return SM; } - - ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } - - // Returns the column at which the fragment of code managed by this - // environment starts. - unsigned getFirstStartColumn() const { return FirstStartColumn; } - - // Returns the column at which subsequent lines of the fragment of code - // managed by this environment should start. - unsigned getNextStartColumn() const { return NextStartColumn; } - - // Returns the column at which the fragment of code managed by this - // environment should end if it ends in a newline. - unsigned getLastStartColumn() const { return LastStartColumn; } - -private: - // This is only set if constructed from string. - std::unique_ptr<SourceManagerForFile> VirtualSM; - - // This refers to either a SourceManager provided by users or VirtualSM - // created for a single file. - SourceManager &SM; - FileID ID; - - SmallVector<CharSourceRange, 8> CharRanges; - unsigned FirstStartColumn; - unsigned NextStartColumn; - unsigned LastStartColumn; -}; - -class TokenAnalyzer : public UnwrappedLineConsumer { -public: - TokenAnalyzer(const Environment &Env, const FormatStyle &Style); - - std::pair<tooling::Replacements, unsigned> process(); - -protected: - virtual std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) = 0; - - void consumeUnwrappedLine(const UnwrappedLine &TheLine) override; - - void finishRun() override; - - FormatStyle Style; - // Stores Style, FileID and SourceManager etc. - const Environment &Env; - // AffectedRangeMgr stores ranges to be fixed. - AffectedRangeManager AffectedRangeMgr; - SmallVector<SmallVector<UnwrappedLine, 16>, 2> UnwrappedLines; - encoding::Encoding Encoding; -}; - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.cpp b/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.cpp deleted file mode 100644 index 24c2f998c38..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.cpp +++ /dev/null @@ -1,3379 +0,0 @@ -//===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a token annotator, i.e. creates -/// \c AnnotatedTokens out of \c FormatTokens with required extra information. -/// -//===----------------------------------------------------------------------===// - -#include "TokenAnnotator.h" -#include "clang/Basic/SourceManager.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Support/Debug.h" - -#define DEBUG_TYPE "format-token-annotator" - -namespace clang { -namespace format { - -namespace { - -/// Returns \c true if the token can be used as an identifier in -/// an Objective-C \c @selector, \c false otherwise. -/// -/// Because getFormattingLangOpts() always lexes source code as -/// Objective-C++, C++ keywords like \c new and \c delete are -/// lexed as tok::kw_*, not tok::identifier, even for Objective-C. -/// -/// For Objective-C and Objective-C++, both identifiers and keywords -/// are valid inside @selector(...) (or a macro which -/// invokes @selector(...)). So, we allow treat any identifier or -/// keyword as a potential Objective-C selector component. -static bool canBeObjCSelectorComponent(const FormatToken &Tok) { - return Tok.Tok.getIdentifierInfo() != nullptr; -} - -/// A parser that gathers additional information about tokens. -/// -/// The \c TokenAnnotator tries to match parenthesis and square brakets and -/// store a parenthesis levels. It also tries to resolve matching "<" and ">" -/// into template parameter lists. -class AnnotatingParser { -public: - AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, - const AdditionalKeywords &Keywords) - : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), - Keywords(Keywords) { - Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); - resetTokenMetadata(CurrentToken); - } - -private: - bool parseAngle() { - if (!CurrentToken || !CurrentToken->Previous) - return false; - if (NonTemplateLess.count(CurrentToken->Previous)) - return false; - - const FormatToken &Previous = *CurrentToken->Previous; // The '<'. - if (Previous.Previous) { - if (Previous.Previous->Tok.isLiteral()) - return false; - if (Previous.Previous->is(tok::r_paren) && Contexts.size() > 1 && - (!Previous.Previous->MatchingParen || - !Previous.Previous->MatchingParen->is(TT_OverloadedOperatorLParen))) - return false; - } - - FormatToken *Left = CurrentToken->Previous; - Left->ParentBracket = Contexts.back().ContextKind; - ScopedContextCreator ContextCreator(*this, tok::less, 12); - - // If this angle is in the context of an expression, we need to be more - // hesitant to detect it as opening template parameters. - bool InExprContext = Contexts.back().IsExpression; - - Contexts.back().IsExpression = false; - // If there's a template keyword before the opening angle bracket, this is a - // template parameter, not an argument. - Contexts.back().InTemplateArgument = - Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); - - if (Style.Language == FormatStyle::LK_Java && - CurrentToken->is(tok::question)) - next(); - - while (CurrentToken) { - if (CurrentToken->is(tok::greater)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - // In TT_Proto, we must distignuish between: - // map<key, value> - // msg < item: data > - // msg: < item: data > - // In TT_TextProto, map<key, value> does not occur. - if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && Left->Previous && - Left->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) - CurrentToken->Type = TT_DictLiteral; - else - CurrentToken->Type = TT_TemplateCloser; - next(); - return true; - } - if (CurrentToken->is(tok::question) && - Style.Language == FormatStyle::LK_Java) { - next(); - continue; - } - if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace) || - (CurrentToken->isOneOf(tok::colon, tok::question) && InExprContext && - Style.Language != FormatStyle::LK_Proto && - Style.Language != FormatStyle::LK_TextProto)) - return false; - // If a && or || is found and interpreted as a binary operator, this set - // of angles is likely part of something like "a < b && c > d". If the - // angles are inside an expression, the ||/&& might also be a binary - // operator that was misinterpreted because we are parsing template - // parameters. - // FIXME: This is getting out of hand, write a decent parser. - if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && - CurrentToken->Previous->is(TT_BinaryOperator) && - Contexts[Contexts.size() - 2].IsExpression && - !Line.startsWith(tok::kw_template)) - return false; - updateParameterCount(Left, CurrentToken); - if (Style.Language == FormatStyle::LK_Proto) { - if (FormatToken *Previous = CurrentToken->getPreviousNonComment()) { - if (CurrentToken->is(tok::colon) || - (CurrentToken->isOneOf(tok::l_brace, tok::less) && - Previous->isNot(tok::colon))) - Previous->Type = TT_SelectorName; - } - } - if (!consumeToken()) - return false; - } - return false; - } - - bool parseParens(bool LookForDecls = false) { - if (!CurrentToken) - return false; - FormatToken *Left = CurrentToken->Previous; - Left->ParentBracket = Contexts.back().ContextKind; - ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); - - // FIXME: This is a bit of a hack. Do better. - Contexts.back().ColonIsForRangeExpr = - Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; - - bool StartsObjCMethodExpr = false; - if (FormatToken *MaybeSel = Left->Previous) { - // @selector( starts a selector. - if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && - MaybeSel->Previous->is(tok::at)) { - StartsObjCMethodExpr = true; - } - } - - if (Left->is(TT_OverloadedOperatorLParen)) { - Contexts.back().IsExpression = false; - } else if (Style.Language == FormatStyle::LK_JavaScript && - (Line.startsWith(Keywords.kw_type, tok::identifier) || - Line.startsWith(tok::kw_export, Keywords.kw_type, - tok::identifier))) { - // type X = (...); - // export type X = (...); - Contexts.back().IsExpression = false; - } else if (Left->Previous && - (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_decltype, - tok::kw_if, tok::kw_while, tok::l_paren, - tok::comma) || - Left->Previous->endsSequence(tok::kw_constexpr, tok::kw_if) || - Left->Previous->is(TT_BinaryOperator))) { - // static_assert, if and while usually contain expressions. - Contexts.back().IsExpression = true; - } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && - (Left->Previous->is(Keywords.kw_function) || - (Left->Previous->endsSequence(tok::identifier, - Keywords.kw_function)))) { - // function(...) or function f(...) - Contexts.back().IsExpression = false; - } else if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && - Left->Previous->is(TT_JsTypeColon)) { - // let x: (SomeType); - Contexts.back().IsExpression = false; - } else if (Left->Previous && Left->Previous->is(tok::r_square) && - Left->Previous->MatchingParen && - Left->Previous->MatchingParen->is(TT_LambdaLSquare)) { - // This is a parameter list of a lambda expression. - Contexts.back().IsExpression = false; - } else if (Line.InPPDirective && - (!Left->Previous || !Left->Previous->is(tok::identifier))) { - Contexts.back().IsExpression = true; - } else if (Contexts[Contexts.size() - 2].CaretFound) { - // This is the parameter list of an ObjC block. - Contexts.back().IsExpression = false; - } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { - Left->Type = TT_AttributeParen; - } else if (Left->Previous && Left->Previous->is(TT_ForEachMacro)) { - // The first argument to a foreach macro is a declaration. - Contexts.back().IsForEachMacro = true; - Contexts.back().IsExpression = false; - } else if (Left->Previous && Left->Previous->MatchingParen && - Left->Previous->MatchingParen->is(TT_ObjCBlockLParen)) { - Contexts.back().IsExpression = false; - } else if (!Line.MustBeDeclaration && !Line.InPPDirective) { - bool IsForOrCatch = - Left->Previous && Left->Previous->isOneOf(tok::kw_for, tok::kw_catch); - Contexts.back().IsExpression = !IsForOrCatch; - } - - if (StartsObjCMethodExpr) { - Contexts.back().ColonIsObjCMethodExpr = true; - Left->Type = TT_ObjCMethodExpr; - } - - // MightBeFunctionType and ProbablyFunctionType are used for - // function pointer and reference types as well as Objective-C - // block types: - // - // void (*FunctionPointer)(void); - // void (&FunctionReference)(void); - // void (^ObjCBlock)(void); - bool MightBeFunctionType = !Contexts[Contexts.size() - 2].IsExpression; - bool ProbablyFunctionType = - CurrentToken->isOneOf(tok::star, tok::amp, tok::caret); - bool HasMultipleLines = false; - bool HasMultipleParametersOnALine = false; - bool MightBeObjCForRangeLoop = - Left->Previous && Left->Previous->is(tok::kw_for); - FormatToken *PossibleObjCForInToken = nullptr; - while (CurrentToken) { - // LookForDecls is set when "if (" has been seen. Check for - // 'identifier' '*' 'identifier' followed by not '=' -- this - // '*' has to be a binary operator but determineStarAmpUsage() will - // categorize it as an unary operator, so set the right type here. - if (LookForDecls && CurrentToken->Next) { - FormatToken *Prev = CurrentToken->getPreviousNonComment(); - if (Prev) { - FormatToken *PrevPrev = Prev->getPreviousNonComment(); - FormatToken *Next = CurrentToken->Next; - if (PrevPrev && PrevPrev->is(tok::identifier) && - Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && - CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { - Prev->Type = TT_BinaryOperator; - LookForDecls = false; - } - } - } - - if (CurrentToken->Previous->is(TT_PointerOrReference) && - CurrentToken->Previous->Previous->isOneOf(tok::l_paren, - tok::coloncolon)) - ProbablyFunctionType = true; - if (CurrentToken->is(tok::comma)) - MightBeFunctionType = false; - if (CurrentToken->Previous->is(TT_BinaryOperator)) - Contexts.back().IsExpression = true; - if (CurrentToken->is(tok::r_paren)) { - if (MightBeFunctionType && ProbablyFunctionType && CurrentToken->Next && - (CurrentToken->Next->is(tok::l_paren) || - (CurrentToken->Next->is(tok::l_square) && Line.MustBeDeclaration))) - Left->Type = Left->Next->is(tok::caret) ? TT_ObjCBlockLParen - : TT_FunctionTypeLParen; - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - - if (CurrentToken->Next && CurrentToken->Next->is(tok::l_brace) && - Left->Previous && Left->Previous->is(tok::l_paren)) { - // Detect the case where macros are used to generate lambdas or - // function bodies, e.g.: - // auto my_lambda = MARCO((Type *type, int i) { .. body .. }); - for (FormatToken *Tok = Left; Tok != CurrentToken; Tok = Tok->Next) { - if (Tok->is(TT_BinaryOperator) && - Tok->isOneOf(tok::star, tok::amp, tok::ampamp)) - Tok->Type = TT_PointerOrReference; - } - } - - if (StartsObjCMethodExpr) { - CurrentToken->Type = TT_ObjCMethodExpr; - if (Contexts.back().FirstObjCSelectorName) { - Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = - Contexts.back().LongestObjCSelectorName; - } - } - - if (Left->is(TT_AttributeParen)) - CurrentToken->Type = TT_AttributeParen; - if (Left->Previous && Left->Previous->is(TT_JavaAnnotation)) - CurrentToken->Type = TT_JavaAnnotation; - if (Left->Previous && Left->Previous->is(TT_LeadingJavaAnnotation)) - CurrentToken->Type = TT_LeadingJavaAnnotation; - - if (!HasMultipleLines) - Left->PackingKind = PPK_Inconclusive; - else if (HasMultipleParametersOnALine) - Left->PackingKind = PPK_BinPacked; - else - Left->PackingKind = PPK_OnePerLine; - - next(); - return true; - } - if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) - return false; - - if (CurrentToken->is(tok::l_brace)) - Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen - if (CurrentToken->is(tok::comma) && CurrentToken->Next && - !CurrentToken->Next->HasUnescapedNewline && - !CurrentToken->Next->isTrailingComment()) - HasMultipleParametersOnALine = true; - if ((CurrentToken->Previous->isOneOf(tok::kw_const, tok::kw_auto) || - CurrentToken->Previous->isSimpleTypeSpecifier()) && - !CurrentToken->is(tok::l_brace)) - Contexts.back().IsExpression = false; - if (CurrentToken->isOneOf(tok::semi, tok::colon)) { - MightBeObjCForRangeLoop = false; - if (PossibleObjCForInToken) { - PossibleObjCForInToken->Type = TT_Unknown; - PossibleObjCForInToken = nullptr; - } - } - if (MightBeObjCForRangeLoop && CurrentToken->is(Keywords.kw_in)) { - PossibleObjCForInToken = CurrentToken; - PossibleObjCForInToken->Type = TT_ObjCForIn; - } - // When we discover a 'new', we set CanBeExpression to 'false' in order to - // parse the type correctly. Reset that after a comma. - if (CurrentToken->is(tok::comma)) - Contexts.back().CanBeExpression = true; - - FormatToken *Tok = CurrentToken; - if (!consumeToken()) - return false; - updateParameterCount(Left, Tok); - if (CurrentToken && CurrentToken->HasUnescapedNewline) - HasMultipleLines = true; - } - return false; - } - - bool isCpp11AttributeSpecifier(const FormatToken &Tok) { - if (!Style.isCpp() || !Tok.startsSequence(tok::l_square, tok::l_square)) - return false; - const FormatToken *AttrTok = Tok.Next->Next; - if (!AttrTok) - return false; - // C++17 '[[using ns: foo, bar(baz, blech)]]' - // We assume nobody will name an ObjC variable 'using'. - if (AttrTok->startsSequence(tok::kw_using, tok::identifier, tok::colon)) - return true; - if (AttrTok->isNot(tok::identifier)) - return false; - while (AttrTok && !AttrTok->startsSequence(tok::r_square, tok::r_square)) { - // ObjC message send. We assume nobody will use : in a C++11 attribute - // specifier parameter, although this is technically valid: - // [[foo(:)]] - if (AttrTok->is(tok::colon) || - AttrTok->startsSequence(tok::identifier, tok::identifier) || - AttrTok->startsSequence(tok::r_paren, tok::identifier)) - return false; - if (AttrTok->is(tok::ellipsis)) - return true; - AttrTok = AttrTok->Next; - } - return AttrTok && AttrTok->startsSequence(tok::r_square, tok::r_square); - } - - bool parseSquare() { - if (!CurrentToken) - return false; - - // A '[' could be an index subscript (after an identifier or after - // ')' or ']'), it could be the start of an Objective-C method - // expression, it could the start of an Objective-C array literal, - // or it could be a C++ attribute specifier [[foo::bar]]. - FormatToken *Left = CurrentToken->Previous; - Left->ParentBracket = Contexts.back().ContextKind; - FormatToken *Parent = Left->getPreviousNonComment(); - - // Cases where '>' is followed by '['. - // In C++, this can happen either in array of templates (foo<int>[10]) - // or when array is a nested template type (unique_ptr<type1<type2>[]>). - bool CppArrayTemplates = - Style.isCpp() && Parent && Parent->is(TT_TemplateCloser) && - (Contexts.back().CanBeExpression || Contexts.back().IsExpression || - Contexts.back().InTemplateArgument); - - bool IsCpp11AttributeSpecifier = isCpp11AttributeSpecifier(*Left) || - Contexts.back().InCpp11AttributeSpecifier; - - bool InsideInlineASM = Line.startsWith(tok::kw_asm); - bool StartsObjCMethodExpr = - !InsideInlineASM && !CppArrayTemplates && Style.isCpp() && - !IsCpp11AttributeSpecifier && Contexts.back().CanBeExpression && - Left->isNot(TT_LambdaLSquare) && - !CurrentToken->isOneOf(tok::l_brace, tok::r_square) && - (!Parent || - Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, - tok::kw_return, tok::kw_throw) || - Parent->isUnaryOperator() || - // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. - Parent->isOneOf(TT_ObjCForIn, TT_CastRParen) || - getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); - bool ColonFound = false; - - unsigned BindingIncrease = 1; - if (Left->isCppStructuredBinding(Style)) { - Left->Type = TT_StructuredBindingLSquare; - } else if (Left->is(TT_Unknown)) { - if (StartsObjCMethodExpr) { - Left->Type = TT_ObjCMethodExpr; - } else if (IsCpp11AttributeSpecifier) { - Left->Type = TT_AttributeSquare; - } else if (Style.Language == FormatStyle::LK_JavaScript && Parent && - Contexts.back().ContextKind == tok::l_brace && - Parent->isOneOf(tok::l_brace, tok::comma)) { - Left->Type = TT_JsComputedPropertyName; - } else if (Style.isCpp() && Contexts.back().ContextKind == tok::l_brace && - Parent && Parent->isOneOf(tok::l_brace, tok::comma)) { - Left->Type = TT_DesignatedInitializerLSquare; - } else if (CurrentToken->is(tok::r_square) && Parent && - Parent->is(TT_TemplateCloser)) { - Left->Type = TT_ArraySubscriptLSquare; - } else if (Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { - // Square braces in LK_Proto can either be message field attributes: - // - // optional Aaa aaa = 1 [ - // (aaa) = aaa - // ]; - // - // extensions 123 [ - // (aaa) = aaa - // ]; - // - // or text proto extensions (in options): - // - // option (Aaa.options) = { - // [type.type/type] { - // key: value - // } - // } - // - // or repeated fields (in options): - // - // option (Aaa.options) = { - // keys: [ 1, 2, 3 ] - // } - // - // In the first and the third case we want to spread the contents inside - // the square braces; in the second we want to keep them inline. - Left->Type = TT_ArrayInitializerLSquare; - if (!Left->endsSequence(tok::l_square, tok::numeric_constant, - tok::equal) && - !Left->endsSequence(tok::l_square, tok::numeric_constant, - tok::identifier) && - !Left->endsSequence(tok::l_square, tok::colon, TT_SelectorName)) { - Left->Type = TT_ProtoExtensionLSquare; - BindingIncrease = 10; - } - } else if (!CppArrayTemplates && Parent && - Parent->isOneOf(TT_BinaryOperator, TT_TemplateCloser, tok::at, - tok::comma, tok::l_paren, tok::l_square, - tok::question, tok::colon, tok::kw_return, - // Should only be relevant to JavaScript: - tok::kw_default)) { - Left->Type = TT_ArrayInitializerLSquare; - } else { - BindingIncrease = 10; - Left->Type = TT_ArraySubscriptLSquare; - } - } - - ScopedContextCreator ContextCreator(*this, tok::l_square, BindingIncrease); - Contexts.back().IsExpression = true; - if (Style.Language == FormatStyle::LK_JavaScript && Parent && - Parent->is(TT_JsTypeColon)) - Contexts.back().IsExpression = false; - - Contexts.back().ColonIsObjCMethodExpr = StartsObjCMethodExpr; - Contexts.back().InCpp11AttributeSpecifier = IsCpp11AttributeSpecifier; - - while (CurrentToken) { - if (CurrentToken->is(tok::r_square)) { - if (IsCpp11AttributeSpecifier) - CurrentToken->Type = TT_AttributeSquare; - else if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && - Left->is(TT_ObjCMethodExpr)) { - // An ObjC method call is rarely followed by an open parenthesis. - // FIXME: Do we incorrectly label ":" with this? - StartsObjCMethodExpr = false; - Left->Type = TT_Unknown; - } - if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { - CurrentToken->Type = TT_ObjCMethodExpr; - // If we haven't seen a colon yet, make sure the last identifier - // before the r_square is tagged as a selector name component. - if (!ColonFound && CurrentToken->Previous && - CurrentToken->Previous->is(TT_Unknown) && - canBeObjCSelectorComponent(*CurrentToken->Previous)) - CurrentToken->Previous->Type = TT_SelectorName; - // determineStarAmpUsage() thinks that '*' '[' is allocating an - // array of pointers, but if '[' starts a selector then '*' is a - // binary operator. - if (Parent && Parent->is(TT_PointerOrReference)) - Parent->Type = TT_BinaryOperator; - } - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - // FirstObjCSelectorName is set when a colon is found. This does - // not work, however, when the method has no parameters. - // Here, we set FirstObjCSelectorName when the end of the method call is - // reached, in case it was not set already. - if (!Contexts.back().FirstObjCSelectorName) { - FormatToken* Previous = CurrentToken->getPreviousNonComment(); - if (Previous && Previous->is(TT_SelectorName)) { - Previous->ObjCSelectorNameParts = 1; - Contexts.back().FirstObjCSelectorName = Previous; - } - } else { - Left->ParameterCount = - Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; - } - if (Contexts.back().FirstObjCSelectorName) { - Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = - Contexts.back().LongestObjCSelectorName; - if (Left->BlockParameterCount > 1) - Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; - } - next(); - return true; - } - if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) - return false; - if (CurrentToken->is(tok::colon)) { - if (IsCpp11AttributeSpecifier && - CurrentToken->endsSequence(tok::colon, tok::identifier, - tok::kw_using)) { - // Remember that this is a [[using ns: foo]] C++ attribute, so we - // don't add a space before the colon (unlike other colons). - CurrentToken->Type = TT_AttributeColon; - } else if (Left->isOneOf(TT_ArraySubscriptLSquare, - TT_DesignatedInitializerLSquare)) { - Left->Type = TT_ObjCMethodExpr; - StartsObjCMethodExpr = true; - Contexts.back().ColonIsObjCMethodExpr = true; - if (Parent && Parent->is(tok::r_paren)) - // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. - Parent->Type = TT_CastRParen; - } - ColonFound = true; - } - if (CurrentToken->is(tok::comma) && Left->is(TT_ObjCMethodExpr) && - !ColonFound) - Left->Type = TT_ArrayInitializerLSquare; - FormatToken *Tok = CurrentToken; - if (!consumeToken()) - return false; - updateParameterCount(Left, Tok); - } - return false; - } - - bool parseBrace() { - if (CurrentToken) { - FormatToken *Left = CurrentToken->Previous; - Left->ParentBracket = Contexts.back().ContextKind; - - if (Contexts.back().CaretFound) - Left->Type = TT_ObjCBlockLBrace; - Contexts.back().CaretFound = false; - - ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); - Contexts.back().ColonIsDictLiteral = true; - if (Left->BlockKind == BK_BracedInit) - Contexts.back().IsExpression = true; - if (Style.Language == FormatStyle::LK_JavaScript && Left->Previous && - Left->Previous->is(TT_JsTypeColon)) - Contexts.back().IsExpression = false; - - while (CurrentToken) { - if (CurrentToken->is(tok::r_brace)) { - Left->MatchingParen = CurrentToken; - CurrentToken->MatchingParen = Left; - next(); - return true; - } - if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) - return false; - updateParameterCount(Left, CurrentToken); - if (CurrentToken->isOneOf(tok::colon, tok::l_brace, tok::less)) { - FormatToken *Previous = CurrentToken->getPreviousNonComment(); - if (Previous->is(TT_JsTypeOptionalQuestion)) - Previous = Previous->getPreviousNonComment(); - if ((CurrentToken->is(tok::colon) && - (!Contexts.back().ColonIsDictLiteral || !Style.isCpp())) || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { - Left->Type = TT_DictLiteral; - if (Previous->Tok.getIdentifierInfo() || - Previous->is(tok::string_literal)) - Previous->Type = TT_SelectorName; - } - if (CurrentToken->is(tok::colon) || - Style.Language == FormatStyle::LK_JavaScript) - Left->Type = TT_DictLiteral; - } - if (CurrentToken->is(tok::comma) && - Style.Language == FormatStyle::LK_JavaScript) - Left->Type = TT_DictLiteral; - if (!consumeToken()) - return false; - } - } - return true; - } - - void updateParameterCount(FormatToken *Left, FormatToken *Current) { - // For ObjC methods, the number of parameters is calculated differently as - // method declarations have a different structure (the parameters are not - // inside a bracket scope). - if (Current->is(tok::l_brace) && Current->BlockKind == BK_Block) - ++Left->BlockParameterCount; - if (Current->is(tok::comma)) { - ++Left->ParameterCount; - if (!Left->Role) - Left->Role.reset(new CommaSeparatedList(Style)); - Left->Role->CommaFound(Current); - } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { - Left->ParameterCount = 1; - } - } - - bool parseConditional() { - while (CurrentToken) { - if (CurrentToken->is(tok::colon)) { - CurrentToken->Type = TT_ConditionalExpr; - next(); - return true; - } - if (!consumeToken()) - return false; - } - return false; - } - - bool parseTemplateDeclaration() { - if (CurrentToken && CurrentToken->is(tok::less)) { - CurrentToken->Type = TT_TemplateOpener; - next(); - if (!parseAngle()) - return false; - if (CurrentToken) - CurrentToken->Previous->ClosesTemplateDeclaration = true; - return true; - } - return false; - } - - bool consumeToken() { - FormatToken *Tok = CurrentToken; - next(); - switch (Tok->Tok.getKind()) { - case tok::plus: - case tok::minus: - if (!Tok->Previous && Line.MustBeDeclaration) - Tok->Type = TT_ObjCMethodSpecifier; - break; - case tok::colon: - if (!Tok->Previous) - return false; - // Colons from ?: are handled in parseConditional(). - if (Style.Language == FormatStyle::LK_JavaScript) { - if (Contexts.back().ColonIsForRangeExpr || // colon in for loop - (Contexts.size() == 1 && // switch/case labels - !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) || - Contexts.back().ContextKind == tok::l_paren || // function params - Contexts.back().ContextKind == tok::l_square || // array type - (!Contexts.back().IsExpression && - Contexts.back().ContextKind == tok::l_brace) || // object type - (Contexts.size() == 1 && - Line.MustBeDeclaration)) { // method/property declaration - Contexts.back().IsExpression = false; - Tok->Type = TT_JsTypeColon; - break; - } - } - if (Contexts.back().ColonIsDictLiteral || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { - Tok->Type = TT_DictLiteral; - if (Style.Language == FormatStyle::LK_TextProto) { - if (FormatToken *Previous = Tok->getPreviousNonComment()) - Previous->Type = TT_SelectorName; - } - } else if (Contexts.back().ColonIsObjCMethodExpr || - Line.startsWith(TT_ObjCMethodSpecifier)) { - Tok->Type = TT_ObjCMethodExpr; - const FormatToken *BeforePrevious = Tok->Previous->Previous; - // Ensure we tag all identifiers in method declarations as - // TT_SelectorName. - bool UnknownIdentifierInMethodDeclaration = - Line.startsWith(TT_ObjCMethodSpecifier) && - Tok->Previous->is(tok::identifier) && Tok->Previous->is(TT_Unknown); - if (!BeforePrevious || - // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. - !(BeforePrevious->is(TT_CastRParen) || - (BeforePrevious->is(TT_ObjCMethodExpr) && - BeforePrevious->is(tok::colon))) || - BeforePrevious->is(tok::r_square) || - Contexts.back().LongestObjCSelectorName == 0 || - UnknownIdentifierInMethodDeclaration) { - Tok->Previous->Type = TT_SelectorName; - if (!Contexts.back().FirstObjCSelectorName) - Contexts.back().FirstObjCSelectorName = Tok->Previous; - else if (Tok->Previous->ColumnWidth > - Contexts.back().LongestObjCSelectorName) - Contexts.back().LongestObjCSelectorName = - Tok->Previous->ColumnWidth; - Tok->Previous->ParameterIndex = - Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; - ++Contexts.back().FirstObjCSelectorName->ObjCSelectorNameParts; - } - } else if (Contexts.back().ColonIsForRangeExpr) { - Tok->Type = TT_RangeBasedForLoopColon; - } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { - Tok->Type = TT_BitFieldColon; - } else if (Contexts.size() == 1 && - !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { - if (Tok->getPreviousNonComment()->isOneOf(tok::r_paren, - tok::kw_noexcept)) - Tok->Type = TT_CtorInitializerColon; - else - Tok->Type = TT_InheritanceColon; - } else if (canBeObjCSelectorComponent(*Tok->Previous) && Tok->Next && - (Tok->Next->isOneOf(tok::r_paren, tok::comma) || - (canBeObjCSelectorComponent(*Tok->Next) && Tok->Next->Next && - Tok->Next->Next->is(tok::colon)))) { - // This handles a special macro in ObjC code where selectors including - // the colon are passed as macro arguments. - Tok->Type = TT_ObjCMethodExpr; - } else if (Contexts.back().ContextKind == tok::l_paren) { - Tok->Type = TT_InlineASMColon; - } - break; - case tok::pipe: - case tok::amp: - // | and & in declarations/type expressions represent union and - // intersection types, respectively. - if (Style.Language == FormatStyle::LK_JavaScript && - !Contexts.back().IsExpression) - Tok->Type = TT_JsTypeOperator; - break; - case tok::kw_if: - case tok::kw_while: - if (Tok->is(tok::kw_if) && CurrentToken && - CurrentToken->is(tok::kw_constexpr)) - next(); - if (CurrentToken && CurrentToken->is(tok::l_paren)) { - next(); - if (!parseParens(/*LookForDecls=*/true)) - return false; - } - break; - case tok::kw_for: - if (Style.Language == FormatStyle::LK_JavaScript) { - // x.for and {for: ...} - if ((Tok->Previous && Tok->Previous->is(tok::period)) || - (Tok->Next && Tok->Next->is(tok::colon))) - break; - // JS' for await ( ... - if (CurrentToken && CurrentToken->is(Keywords.kw_await)) - next(); - } - Contexts.back().ColonIsForRangeExpr = true; - next(); - if (!parseParens()) - return false; - break; - case tok::l_paren: - // When faced with 'operator()()', the kw_operator handler incorrectly - // marks the first l_paren as a OverloadedOperatorLParen. Here, we make - // the first two parens OverloadedOperators and the second l_paren an - // OverloadedOperatorLParen. - if (Tok->Previous && Tok->Previous->is(tok::r_paren) && - Tok->Previous->MatchingParen && - Tok->Previous->MatchingParen->is(TT_OverloadedOperatorLParen)) { - Tok->Previous->Type = TT_OverloadedOperator; - Tok->Previous->MatchingParen->Type = TT_OverloadedOperator; - Tok->Type = TT_OverloadedOperatorLParen; - } - - if (!parseParens()) - return false; - if (Line.MustBeDeclaration && Contexts.size() == 1 && - !Contexts.back().IsExpression && !Line.startsWith(TT_ObjCProperty) && - (!Tok->Previous || - !Tok->Previous->isOneOf(tok::kw_decltype, tok::kw___attribute, - TT_LeadingJavaAnnotation))) - Line.MightBeFunctionDecl = true; - break; - case tok::l_square: - if (!parseSquare()) - return false; - break; - case tok::l_brace: - if (Style.Language == FormatStyle::LK_TextProto) { - FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->Type != TT_DictLiteral) - Previous->Type = TT_SelectorName; - } - if (!parseBrace()) - return false; - break; - case tok::less: - if (parseAngle()) { - Tok->Type = TT_TemplateOpener; - // In TT_Proto, we must distignuish between: - // map<key, value> - // msg < item: data > - // msg: < item: data > - // In TT_TextProto, map<key, value> does not occur. - if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && Tok->Previous && - Tok->Previous->isOneOf(TT_SelectorName, TT_DictLiteral))) { - Tok->Type = TT_DictLiteral; - FormatToken *Previous = Tok->getPreviousNonComment(); - if (Previous && Previous->Type != TT_DictLiteral) - Previous->Type = TT_SelectorName; - } - } else { - Tok->Type = TT_BinaryOperator; - NonTemplateLess.insert(Tok); - CurrentToken = Tok; - next(); - } - break; - case tok::r_paren: - case tok::r_square: - return false; - case tok::r_brace: - // Lines can start with '}'. - if (Tok->Previous) - return false; - break; - case tok::greater: - if (Style.Language != FormatStyle::LK_TextProto) - Tok->Type = TT_BinaryOperator; - break; - case tok::kw_operator: - if (Style.Language == FormatStyle::LK_TextProto || - Style.Language == FormatStyle::LK_Proto) - break; - while (CurrentToken && - !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { - if (CurrentToken->isOneOf(tok::star, tok::amp)) - CurrentToken->Type = TT_PointerOrReference; - consumeToken(); - if (CurrentToken && - CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, - tok::comma)) - CurrentToken->Previous->Type = TT_OverloadedOperator; - } - if (CurrentToken) { - CurrentToken->Type = TT_OverloadedOperatorLParen; - if (CurrentToken->Previous->is(TT_BinaryOperator)) - CurrentToken->Previous->Type = TT_OverloadedOperator; - } - break; - case tok::question: - if (Style.Language == FormatStyle::LK_JavaScript && Tok->Next && - Tok->Next->isOneOf(tok::semi, tok::comma, tok::colon, tok::r_paren, - tok::r_brace)) { - // Question marks before semicolons, colons, etc. indicate optional - // types (fields, parameters), e.g. - // function(x?: string, y?) {...} - // class X { y?; } - Tok->Type = TT_JsTypeOptionalQuestion; - break; - } - // Declarations cannot be conditional expressions, this can only be part - // of a type declaration. - if (Line.MustBeDeclaration && !Contexts.back().IsExpression && - Style.Language == FormatStyle::LK_JavaScript) - break; - parseConditional(); - break; - case tok::kw_template: - parseTemplateDeclaration(); - break; - case tok::comma: - if (Contexts.back().InCtorInitializer) - Tok->Type = TT_CtorInitializerComma; - else if (Contexts.back().InInheritanceList) - Tok->Type = TT_InheritanceComma; - else if (Contexts.back().FirstStartOfName && - (Contexts.size() == 1 || Line.startsWith(tok::kw_for))) { - Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; - Line.IsMultiVariableDeclStmt = true; - } - if (Contexts.back().IsForEachMacro) - Contexts.back().IsExpression = true; - break; - case tok::identifier: - if (Tok->isOneOf(Keywords.kw___has_include, - Keywords.kw___has_include_next)) { - parseHasInclude(); - } - break; - default: - break; - } - return true; - } - - void parseIncludeDirective() { - if (CurrentToken && CurrentToken->is(tok::less)) { - next(); - while (CurrentToken) { - // Mark tokens up to the trailing line comments as implicit string - // literals. - if (CurrentToken->isNot(tok::comment) && - !CurrentToken->TokenText.startswith("//")) - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - } - } - - void parseWarningOrError() { - next(); - // We still want to format the whitespace left of the first token of the - // warning or error. - next(); - while (CurrentToken) { - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - } - - void parsePragma() { - next(); // Consume "pragma". - if (CurrentToken && - CurrentToken->isOneOf(Keywords.kw_mark, Keywords.kw_option)) { - bool IsMark = CurrentToken->is(Keywords.kw_mark); - next(); // Consume "mark". - next(); // Consume first token (so we fix leading whitespace). - while (CurrentToken) { - if (IsMark || CurrentToken->Previous->is(TT_BinaryOperator)) - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - } - } - - void parseHasInclude() { - if (!CurrentToken || !CurrentToken->is(tok::l_paren)) - return; - next(); // '(' - parseIncludeDirective(); - next(); // ')' - } - - LineType parsePreprocessorDirective() { - bool IsFirstToken = CurrentToken->IsFirst; - LineType Type = LT_PreprocessorDirective; - next(); - if (!CurrentToken) - return Type; - - if (Style.Language == FormatStyle::LK_JavaScript && IsFirstToken) { - // JavaScript files can contain shebang lines of the form: - // #!/usr/bin/env node - // Treat these like C++ #include directives. - while (CurrentToken) { - // Tokens cannot be comments here. - CurrentToken->Type = TT_ImplicitStringLiteral; - next(); - } - return LT_ImportStatement; - } - - if (CurrentToken->Tok.is(tok::numeric_constant)) { - CurrentToken->SpacesRequiredBefore = 1; - return Type; - } - // Hashes in the middle of a line can lead to any strange token - // sequence. - if (!CurrentToken->Tok.getIdentifierInfo()) - return Type; - switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { - case tok::pp_include: - case tok::pp_include_next: - case tok::pp_import: - next(); - parseIncludeDirective(); - Type = LT_ImportStatement; - break; - case tok::pp_error: - case tok::pp_warning: - parseWarningOrError(); - break; - case tok::pp_pragma: - parsePragma(); - break; - case tok::pp_if: - case tok::pp_elif: - Contexts.back().IsExpression = true; - parseLine(); - break; - default: - break; - } - while (CurrentToken) { - FormatToken *Tok = CurrentToken; - next(); - if (Tok->is(tok::l_paren)) - parseParens(); - else if (Tok->isOneOf(Keywords.kw___has_include, - Keywords.kw___has_include_next)) - parseHasInclude(); - } - return Type; - } - -public: - LineType parseLine() { - NonTemplateLess.clear(); - if (CurrentToken->is(tok::hash)) - return parsePreprocessorDirective(); - - // Directly allow to 'import <string-literal>' to support protocol buffer - // definitions (github.com/google/protobuf) or missing "#" (either way we - // should not break the line). - IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); - if ((Style.Language == FormatStyle::LK_Java && - CurrentToken->is(Keywords.kw_package)) || - (Info && Info->getPPKeywordID() == tok::pp_import && - CurrentToken->Next && - CurrentToken->Next->isOneOf(tok::string_literal, tok::identifier, - tok::kw_static))) { - next(); - parseIncludeDirective(); - return LT_ImportStatement; - } - - // If this line starts and ends in '<' and '>', respectively, it is likely - // part of "#define <a/b.h>". - if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { - parseIncludeDirective(); - return LT_ImportStatement; - } - - // In .proto files, top-level options are very similar to import statements - // and should not be line-wrapped. - if (Style.Language == FormatStyle::LK_Proto && Line.Level == 0 && - CurrentToken->is(Keywords.kw_option)) { - next(); - if (CurrentToken && CurrentToken->is(tok::identifier)) - return LT_ImportStatement; - } - - bool KeywordVirtualFound = false; - bool ImportStatement = false; - - // import {...} from '...'; - if (Style.Language == FormatStyle::LK_JavaScript && - CurrentToken->is(Keywords.kw_import)) - ImportStatement = true; - - while (CurrentToken) { - if (CurrentToken->is(tok::kw_virtual)) - KeywordVirtualFound = true; - if (Style.Language == FormatStyle::LK_JavaScript) { - // export {...} from '...'; - // An export followed by "from 'some string';" is a re-export from - // another module identified by a URI and is treated as a - // LT_ImportStatement (i.e. prevent wraps on it for long URIs). - // Just "export {...};" or "export class ..." should not be treated as - // an import in this sense. - if (Line.First->is(tok::kw_export) && - CurrentToken->is(Keywords.kw_from) && CurrentToken->Next && - CurrentToken->Next->isStringLiteral()) - ImportStatement = true; - if (isClosureImportStatement(*CurrentToken)) - ImportStatement = true; - } - if (!consumeToken()) - return LT_Invalid; - } - if (KeywordVirtualFound) - return LT_VirtualFunctionDecl; - if (ImportStatement) - return LT_ImportStatement; - - if (Line.startsWith(TT_ObjCMethodSpecifier)) { - if (Contexts.back().FirstObjCSelectorName) - Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = - Contexts.back().LongestObjCSelectorName; - return LT_ObjCMethodDecl; - } - - return LT_Other; - } - -private: - bool isClosureImportStatement(const FormatToken &Tok) { - // FIXME: Closure-library specific stuff should not be hard-coded but be - // configurable. - return Tok.TokenText == "goog" && Tok.Next && Tok.Next->is(tok::period) && - Tok.Next->Next && - (Tok.Next->Next->TokenText == "module" || - Tok.Next->Next->TokenText == "provide" || - Tok.Next->Next->TokenText == "require" || - Tok.Next->Next->TokenText == "requireType" || - Tok.Next->Next->TokenText == "forwardDeclare") && - Tok.Next->Next->Next && Tok.Next->Next->Next->is(tok::l_paren); - } - - void resetTokenMetadata(FormatToken *Token) { - if (!Token) - return; - - // Reset token type in case we have already looked at it and then - // recovered from an error (e.g. failure to find the matching >). - if (!CurrentToken->isOneOf(TT_LambdaLSquare, TT_ForEachMacro, - TT_FunctionLBrace, TT_ImplicitStringLiteral, - TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, - TT_OverloadedOperator, TT_RegexLiteral, - TT_TemplateString, TT_ObjCStringLiteral)) - CurrentToken->Type = TT_Unknown; - CurrentToken->Role.reset(); - CurrentToken->MatchingParen = nullptr; - CurrentToken->FakeLParens.clear(); - CurrentToken->FakeRParens = 0; - } - - void next() { - if (CurrentToken) { - CurrentToken->NestingLevel = Contexts.size() - 1; - CurrentToken->BindingStrength = Contexts.back().BindingStrength; - modifyContext(*CurrentToken); - determineTokenType(*CurrentToken); - CurrentToken = CurrentToken->Next; - } - - resetTokenMetadata(CurrentToken); - } - - /// A struct to hold information valid in a specific context, e.g. - /// a pair of parenthesis. - struct Context { - Context(tok::TokenKind ContextKind, unsigned BindingStrength, - bool IsExpression) - : ContextKind(ContextKind), BindingStrength(BindingStrength), - IsExpression(IsExpression) {} - - tok::TokenKind ContextKind; - unsigned BindingStrength; - bool IsExpression; - unsigned LongestObjCSelectorName = 0; - bool ColonIsForRangeExpr = false; - bool ColonIsDictLiteral = false; - bool ColonIsObjCMethodExpr = false; - FormatToken *FirstObjCSelectorName = nullptr; - FormatToken *FirstStartOfName = nullptr; - bool CanBeExpression = true; - bool InTemplateArgument = false; - bool InCtorInitializer = false; - bool InInheritanceList = false; - bool CaretFound = false; - bool IsForEachMacro = false; - bool InCpp11AttributeSpecifier = false; - }; - - /// Puts a new \c Context onto the stack \c Contexts for the lifetime - /// of each instance. - struct ScopedContextCreator { - AnnotatingParser &P; - - ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, - unsigned Increase) - : P(P) { - P.Contexts.push_back(Context(ContextKind, - P.Contexts.back().BindingStrength + Increase, - P.Contexts.back().IsExpression)); - } - - ~ScopedContextCreator() { P.Contexts.pop_back(); } - }; - - void modifyContext(const FormatToken &Current) { - if (Current.getPrecedence() == prec::Assignment && - !Line.First->isOneOf(tok::kw_template, tok::kw_using, tok::kw_return) && - // Type aliases use `type X = ...;` in TypeScript and can be exported - // using `export type ...`. - !(Style.Language == FormatStyle::LK_JavaScript && - (Line.startsWith(Keywords.kw_type, tok::identifier) || - Line.startsWith(tok::kw_export, Keywords.kw_type, - tok::identifier))) && - (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { - Contexts.back().IsExpression = true; - if (!Line.startsWith(TT_UnaryOperator)) { - for (FormatToken *Previous = Current.Previous; - Previous && Previous->Previous && - !Previous->Previous->isOneOf(tok::comma, tok::semi); - Previous = Previous->Previous) { - if (Previous->isOneOf(tok::r_square, tok::r_paren)) { - Previous = Previous->MatchingParen; - if (!Previous) - break; - } - if (Previous->opensScope()) - break; - if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator) && - Previous->isOneOf(tok::star, tok::amp, tok::ampamp) && - Previous->Previous && Previous->Previous->isNot(tok::equal)) - Previous->Type = TT_PointerOrReference; - } - } - } else if (Current.is(tok::lessless) && - (!Current.Previous || !Current.Previous->is(tok::kw_operator))) { - Contexts.back().IsExpression = true; - } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { - Contexts.back().IsExpression = true; - } else if (Current.is(TT_TrailingReturnArrow)) { - Contexts.back().IsExpression = false; - } else if (Current.is(TT_LambdaArrow) || Current.is(Keywords.kw_assert)) { - Contexts.back().IsExpression = Style.Language == FormatStyle::LK_Java; - } else if (Current.Previous && - Current.Previous->is(TT_CtorInitializerColon)) { - Contexts.back().IsExpression = true; - Contexts.back().InCtorInitializer = true; - } else if (Current.Previous && Current.Previous->is(TT_InheritanceColon)) { - Contexts.back().InInheritanceList = true; - } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { - for (FormatToken *Previous = Current.Previous; - Previous && Previous->isOneOf(tok::star, tok::amp); - Previous = Previous->Previous) - Previous->Type = TT_PointerOrReference; - if (Line.MustBeDeclaration && !Contexts.front().InCtorInitializer) - Contexts.back().IsExpression = false; - } else if (Current.is(tok::kw_new)) { - Contexts.back().CanBeExpression = false; - } else if (Current.isOneOf(tok::semi, tok::exclaim)) { - // This should be the condition or increment in a for-loop. - Contexts.back().IsExpression = true; - } - } - - void determineTokenType(FormatToken &Current) { - if (!Current.is(TT_Unknown)) - // The token type is already known. - return; - - if (Style.Language == FormatStyle::LK_JavaScript) { - if (Current.is(tok::exclaim)) { - if (Current.Previous && - (Current.Previous->isOneOf(tok::identifier, tok::kw_namespace, - tok::r_paren, tok::r_square, - tok::r_brace) || - Current.Previous->Tok.isLiteral())) { - Current.Type = TT_JsNonNullAssertion; - return; - } - if (Current.Next && - Current.Next->isOneOf(TT_BinaryOperator, Keywords.kw_as)) { - Current.Type = TT_JsNonNullAssertion; - return; - } - } - } - - // Line.MightBeFunctionDecl can only be true after the parentheses of a - // function declaration have been found. In this case, 'Current' is a - // trailing token of this declaration and thus cannot be a name. - if (Current.is(Keywords.kw_instanceof)) { - Current.Type = TT_BinaryOperator; - } else if (isStartOfName(Current) && - (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { - Contexts.back().FirstStartOfName = &Current; - Current.Type = TT_StartOfName; - } else if (Current.is(tok::semi)) { - // Reset FirstStartOfName after finding a semicolon so that a for loop - // with multiple increment statements is not confused with a for loop - // having multiple variable declarations. - Contexts.back().FirstStartOfName = nullptr; - } else if (Current.isOneOf(tok::kw_auto, tok::kw___auto_type)) { - AutoFound = true; - } else if (Current.is(tok::arrow) && - Style.Language == FormatStyle::LK_Java) { - Current.Type = TT_LambdaArrow; - } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && - Current.NestingLevel == 0) { - Current.Type = TT_TrailingReturnArrow; - } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { - Current.Type = determineStarAmpUsage(Current, - Contexts.back().CanBeExpression && - Contexts.back().IsExpression, - Contexts.back().InTemplateArgument); - } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { - Current.Type = determinePlusMinusCaretUsage(Current); - if (Current.is(TT_UnaryOperator) && Current.is(tok::caret)) - Contexts.back().CaretFound = true; - } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { - Current.Type = determineIncrementUsage(Current); - } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { - Current.Type = TT_UnaryOperator; - } else if (Current.is(tok::question)) { - if (Style.Language == FormatStyle::LK_JavaScript && - Line.MustBeDeclaration && !Contexts.back().IsExpression) { - // In JavaScript, `interface X { foo?(): bar; }` is an optional method - // on the interface, not a ternary expression. - Current.Type = TT_JsTypeOptionalQuestion; - } else { - Current.Type = TT_ConditionalExpr; - } - } else if (Current.isBinaryOperator() && - (!Current.Previous || Current.Previous->isNot(tok::l_square)) && - (!Current.is(tok::greater) && - Style.Language != FormatStyle::LK_TextProto)) { - Current.Type = TT_BinaryOperator; - } else if (Current.is(tok::comment)) { - if (Current.TokenText.startswith("/*")) { - if (Current.TokenText.endswith("*/")) - Current.Type = TT_BlockComment; - else - // The lexer has for some reason determined a comment here. But we - // cannot really handle it, if it isn't properly terminated. - Current.Tok.setKind(tok::unknown); - } else { - Current.Type = TT_LineComment; - } - } else if (Current.is(tok::r_paren)) { - if (rParenEndsCast(Current)) - Current.Type = TT_CastRParen; - if (Current.MatchingParen && Current.Next && - !Current.Next->isBinaryOperator() && - !Current.Next->isOneOf(tok::semi, tok::colon, tok::l_brace, - tok::comma, tok::period, tok::arrow, - tok::coloncolon)) - if (FormatToken *AfterParen = Current.MatchingParen->Next) { - // Make sure this isn't the return type of an Obj-C block declaration - if (AfterParen->Tok.isNot(tok::caret)) { - if (FormatToken *BeforeParen = Current.MatchingParen->Previous) - if (BeforeParen->is(tok::identifier) && - BeforeParen->TokenText == BeforeParen->TokenText.upper() && - (!BeforeParen->Previous || - BeforeParen->Previous->ClosesTemplateDeclaration)) - Current.Type = TT_FunctionAnnotationRParen; - } - } - } else if (Current.is(tok::at) && Current.Next && - Style.Language != FormatStyle::LK_JavaScript && - Style.Language != FormatStyle::LK_Java) { - // In Java & JavaScript, "@..." is a decorator or annotation. In ObjC, it - // marks declarations and properties that need special formatting. - switch (Current.Next->Tok.getObjCKeywordID()) { - case tok::objc_interface: - case tok::objc_implementation: - case tok::objc_protocol: - Current.Type = TT_ObjCDecl; - break; - case tok::objc_property: - Current.Type = TT_ObjCProperty; - break; - default: - break; - } - } else if (Current.is(tok::period)) { - FormatToken *PreviousNoComment = Current.getPreviousNonComment(); - if (PreviousNoComment && - PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) - Current.Type = TT_DesignatedInitializerPeriod; - else if (Style.Language == FormatStyle::LK_Java && Current.Previous && - Current.Previous->isOneOf(TT_JavaAnnotation, - TT_LeadingJavaAnnotation)) { - Current.Type = Current.Previous->Type; - } - } else if (canBeObjCSelectorComponent(Current) && - // FIXME(bug 36976): ObjC return types shouldn't use TT_CastRParen. - Current.Previous && Current.Previous->is(TT_CastRParen) && - Current.Previous->MatchingParen && - Current.Previous->MatchingParen->Previous && - Current.Previous->MatchingParen->Previous->is( - TT_ObjCMethodSpecifier)) { - // This is the first part of an Objective-C selector name. (If there's no - // colon after this, this is the only place which annotates the identifier - // as a selector.) - Current.Type = TT_SelectorName; - } else if (Current.isOneOf(tok::identifier, tok::kw_const) && - Current.Previous && - !Current.Previous->isOneOf(tok::equal, tok::at) && - Line.MightBeFunctionDecl && Contexts.size() == 1) { - // Line.MightBeFunctionDecl can only be true after the parentheses of a - // function declaration have been found. - Current.Type = TT_TrailingAnnotation; - } else if ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - Current.Previous) { - if (Current.Previous->is(tok::at) && - Current.isNot(Keywords.kw_interface)) { - const FormatToken &AtToken = *Current.Previous; - const FormatToken *Previous = AtToken.getPreviousNonComment(); - if (!Previous || Previous->is(TT_LeadingJavaAnnotation)) - Current.Type = TT_LeadingJavaAnnotation; - else - Current.Type = TT_JavaAnnotation; - } else if (Current.Previous->is(tok::period) && - Current.Previous->isOneOf(TT_JavaAnnotation, - TT_LeadingJavaAnnotation)) { - Current.Type = Current.Previous->Type; - } - } - } - - /// Take a guess at whether \p Tok starts a name of a function or - /// variable declaration. - /// - /// This is a heuristic based on whether \p Tok is an identifier following - /// something that is likely a type. - bool isStartOfName(const FormatToken &Tok) { - if (Tok.isNot(tok::identifier) || !Tok.Previous) - return false; - - if (Tok.Previous->isOneOf(TT_LeadingJavaAnnotation, Keywords.kw_instanceof, - Keywords.kw_as)) - return false; - if (Style.Language == FormatStyle::LK_JavaScript && - Tok.Previous->is(Keywords.kw_in)) - return false; - - // Skip "const" as it does not have an influence on whether this is a name. - FormatToken *PreviousNotConst = Tok.getPreviousNonComment(); - while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) - PreviousNotConst = PreviousNotConst->getPreviousNonComment(); - - if (!PreviousNotConst) - return false; - - bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && - PreviousNotConst->Previous && - PreviousNotConst->Previous->is(tok::hash); - - if (PreviousNotConst->is(TT_TemplateCloser)) - return PreviousNotConst && PreviousNotConst->MatchingParen && - PreviousNotConst->MatchingParen->Previous && - PreviousNotConst->MatchingParen->Previous->isNot(tok::period) && - PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); - - if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && - PreviousNotConst->MatchingParen->Previous && - PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) - return true; - - return (!IsPPKeyword && - PreviousNotConst->isOneOf(tok::identifier, tok::kw_auto)) || - PreviousNotConst->is(TT_PointerOrReference) || - PreviousNotConst->isSimpleTypeSpecifier(); - } - - /// Determine whether ')' is ending a cast. - bool rParenEndsCast(const FormatToken &Tok) { - // C-style casts are only used in C++ and Java. - if (!Style.isCpp() && Style.Language != FormatStyle::LK_Java) - return false; - - // Empty parens aren't casts and there are no casts at the end of the line. - if (Tok.Previous == Tok.MatchingParen || !Tok.Next || !Tok.MatchingParen) - return false; - - FormatToken *LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); - if (LeftOfParens) { - // If there is a closing parenthesis left of the current parentheses, - // look past it as these might be chained casts. - if (LeftOfParens->is(tok::r_paren)) { - if (!LeftOfParens->MatchingParen || - !LeftOfParens->MatchingParen->Previous) - return false; - LeftOfParens = LeftOfParens->MatchingParen->Previous; - } - - // If there is an identifier (or with a few exceptions a keyword) right - // before the parentheses, this is unlikely to be a cast. - if (LeftOfParens->Tok.getIdentifierInfo() && - !LeftOfParens->isOneOf(Keywords.kw_in, tok::kw_return, tok::kw_case, - tok::kw_delete)) - return false; - - // Certain other tokens right before the parentheses are also signals that - // this cannot be a cast. - if (LeftOfParens->isOneOf(tok::at, tok::r_square, TT_OverloadedOperator, - TT_TemplateCloser, tok::ellipsis)) - return false; - } - - if (Tok.Next->is(tok::question)) - return false; - - // As Java has no function types, a "(" after the ")" likely means that this - // is a cast. - if (Style.Language == FormatStyle::LK_Java && Tok.Next->is(tok::l_paren)) - return true; - - // If a (non-string) literal follows, this is likely a cast. - if (Tok.Next->isNot(tok::string_literal) && - (Tok.Next->Tok.isLiteral() || - Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) - return true; - - // Heuristically try to determine whether the parentheses contain a type. - bool ParensAreType = - !Tok.Previous || - Tok.Previous->isOneOf(TT_PointerOrReference, TT_TemplateCloser) || - Tok.Previous->isSimpleTypeSpecifier(); - bool ParensCouldEndDecl = - Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace, tok::greater); - if (ParensAreType && !ParensCouldEndDecl) - return true; - - // At this point, we heuristically assume that there are no casts at the - // start of the line. We assume that we have found most cases where there - // are by the logic above, e.g. "(void)x;". - if (!LeftOfParens) - return false; - - // Certain token types inside the parentheses mean that this can't be a - // cast. - for (const FormatToken *Token = Tok.MatchingParen->Next; Token != &Tok; - Token = Token->Next) - if (Token->is(TT_BinaryOperator)) - return false; - - // If the following token is an identifier or 'this', this is a cast. All - // cases where this can be something else are handled above. - if (Tok.Next->isOneOf(tok::identifier, tok::kw_this)) - return true; - - if (!Tok.Next->Next) - return false; - - // If the next token after the parenthesis is a unary operator, assume - // that this is cast, unless there are unexpected tokens inside the - // parenthesis. - bool NextIsUnary = - Tok.Next->isUnaryOperator() || Tok.Next->isOneOf(tok::amp, tok::star); - if (!NextIsUnary || Tok.Next->is(tok::plus) || - !Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant)) - return false; - // Search for unexpected tokens. - for (FormatToken *Prev = Tok.Previous; Prev != Tok.MatchingParen; - Prev = Prev->Previous) { - if (!Prev->isOneOf(tok::kw_const, tok::identifier, tok::coloncolon)) - return false; - } - return true; - } - - /// Return the type of the given token assuming it is * or &. - TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, - bool InTemplateArgument) { - if (Style.Language == FormatStyle::LK_JavaScript) - return TT_BinaryOperator; - - const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken) - return TT_UnaryOperator; - - const FormatToken *NextToken = Tok.getNextNonComment(); - if (!NextToken || - NextToken->isOneOf(tok::arrow, tok::equal, tok::kw_const) || - (NextToken->is(tok::l_brace) && !NextToken->getNextNonComment())) - return TT_PointerOrReference; - - if (PrevToken->is(tok::coloncolon)) - return TT_PointerOrReference; - - if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, - tok::comma, tok::semi, tok::kw_return, tok::colon, - tok::equal, tok::kw_delete, tok::kw_sizeof, - tok::kw_throw) || - PrevToken->isOneOf(TT_BinaryOperator, TT_ConditionalExpr, - TT_UnaryOperator, TT_CastRParen)) - return TT_UnaryOperator; - - if (NextToken->is(tok::l_square) && NextToken->isNot(TT_LambdaLSquare)) - return TT_PointerOrReference; - if (NextToken->is(tok::kw_operator) && !IsExpression) - return TT_PointerOrReference; - if (NextToken->isOneOf(tok::comma, tok::semi)) - return TT_PointerOrReference; - - if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen) { - FormatToken *TokenBeforeMatchingParen = - PrevToken->MatchingParen->getPreviousNonComment(); - if (TokenBeforeMatchingParen && - TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) - return TT_PointerOrReference; - } - - if (PrevToken->Tok.isLiteral() || - PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, - tok::kw_false, tok::r_brace) || - NextToken->Tok.isLiteral() || - NextToken->isOneOf(tok::kw_true, tok::kw_false) || - NextToken->isUnaryOperator() || - // If we know we're in a template argument, there are no named - // declarations. Thus, having an identifier on the right-hand side - // indicates a binary operator. - (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) - return TT_BinaryOperator; - - // "&&(" is quite unlikely to be two successive unary "&". - if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) - return TT_BinaryOperator; - - // This catches some cases where evaluation order is used as control flow: - // aaa && aaa->f(); - const FormatToken *NextNextToken = NextToken->getNextNonComment(); - if (NextNextToken && NextNextToken->is(tok::arrow)) - return TT_BinaryOperator; - - // It is very unlikely that we are going to find a pointer or reference type - // definition on the RHS of an assignment. - if (IsExpression && !Contexts.back().CaretFound) - return TT_BinaryOperator; - - return TT_PointerOrReference; - } - - TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { - const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken) - return TT_UnaryOperator; - - if (PrevToken->isOneOf(TT_CastRParen, TT_UnaryOperator)) - // This must be a sequence of leading unary operators. - return TT_UnaryOperator; - - // Use heuristics to recognize unary operators. - if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, - tok::question, tok::colon, tok::kw_return, - tok::kw_case, tok::at, tok::l_brace)) - return TT_UnaryOperator; - - // There can't be two consecutive binary operators. - if (PrevToken->is(TT_BinaryOperator)) - return TT_UnaryOperator; - - // Fall back to marking the token as binary operator. - return TT_BinaryOperator; - } - - /// Determine whether ++/-- are pre- or post-increments/-decrements. - TokenType determineIncrementUsage(const FormatToken &Tok) { - const FormatToken *PrevToken = Tok.getPreviousNonComment(); - if (!PrevToken || PrevToken->is(TT_CastRParen)) - return TT_UnaryOperator; - if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) - return TT_TrailingUnaryOperator; - - return TT_UnaryOperator; - } - - SmallVector<Context, 8> Contexts; - - const FormatStyle &Style; - AnnotatedLine &Line; - FormatToken *CurrentToken; - bool AutoFound; - const AdditionalKeywords &Keywords; - - // Set of "<" tokens that do not open a template parameter list. If parseAngle - // determines that a specific token can't be a template opener, it will make - // same decision irrespective of the decisions for tokens leading up to it. - // Store this information to prevent this from causing exponential runtime. - llvm::SmallPtrSet<FormatToken *, 16> NonTemplateLess; -}; - -static const int PrecedenceUnaryOperator = prec::PointerToMember + 1; -static const int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; - -/// Parses binary expressions by inserting fake parenthesis based on -/// operator precedence. -class ExpressionParser { -public: - ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, - AnnotatedLine &Line) - : Style(Style), Keywords(Keywords), Current(Line.First) {} - - /// Parse expressions with the given operator precedence. - void parse(int Precedence = 0) { - // Skip 'return' and ObjC selector colons as they are not part of a binary - // expression. - while (Current && (Current->is(tok::kw_return) || - (Current->is(tok::colon) && - Current->isOneOf(TT_ObjCMethodExpr, TT_DictLiteral)))) - next(); - - if (!Current || Precedence > PrecedenceArrowAndPeriod) - return; - - // Conditional expressions need to be parsed separately for proper nesting. - if (Precedence == prec::Conditional) { - parseConditionalExpr(); - return; - } - - // Parse unary operators, which all have a higher precedence than binary - // operators. - if (Precedence == PrecedenceUnaryOperator) { - parseUnaryOperator(); - return; - } - - FormatToken *Start = Current; - FormatToken *LatestOperator = nullptr; - unsigned OperatorIndex = 0; - - while (Current) { - // Consume operators with higher precedence. - parse(Precedence + 1); - - int CurrentPrecedence = getCurrentPrecedence(); - - if (Current && Current->is(TT_SelectorName) && - Precedence == CurrentPrecedence) { - if (LatestOperator) - addFakeParenthesis(Start, prec::Level(Precedence)); - Start = Current; - } - - // At the end of the line or when an operator with higher precedence is - // found, insert fake parenthesis and return. - if (!Current || - (Current->closesScope() && - (Current->MatchingParen || Current->is(TT_TemplateString))) || - (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || - (CurrentPrecedence == prec::Conditional && - Precedence == prec::Assignment && Current->is(tok::colon))) { - break; - } - - // Consume scopes: (), [], <> and {} - if (Current->opensScope()) { - // In fragment of a JavaScript template string can look like '}..${' and - // thus close a scope and open a new one at the same time. - while (Current && (!Current->closesScope() || Current->opensScope())) { - next(); - parse(); - } - next(); - } else { - // Operator found. - if (CurrentPrecedence == Precedence) { - if (LatestOperator) - LatestOperator->NextOperator = Current; - LatestOperator = Current; - Current->OperatorIndex = OperatorIndex; - ++OperatorIndex; - } - next(/*SkipPastLeadingComments=*/Precedence > 0); - } - } - - if (LatestOperator && (Current || Precedence > 0)) { - // LatestOperator->LastOperator = true; - if (Precedence == PrecedenceArrowAndPeriod) { - // Call expressions don't have a binary operator precedence. - addFakeParenthesis(Start, prec::Unknown); - } else { - addFakeParenthesis(Start, prec::Level(Precedence)); - } - } - } - -private: - /// Gets the precedence (+1) of the given token for binary operators - /// and other tokens that we treat like binary operators. - int getCurrentPrecedence() { - if (Current) { - const FormatToken *NextNonComment = Current->getNextNonComment(); - if (Current->is(TT_ConditionalExpr)) - return prec::Conditional; - if (NextNonComment && Current->is(TT_SelectorName) && - (NextNonComment->isOneOf(TT_DictLiteral, TT_JsTypeColon) || - ((Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - NextNonComment->is(tok::less)))) - return prec::Assignment; - if (Current->is(TT_JsComputedPropertyName)) - return prec::Assignment; - if (Current->is(TT_LambdaArrow)) - return prec::Comma; - if (Current->is(TT_JsFatArrow)) - return prec::Assignment; - if (Current->isOneOf(tok::semi, TT_InlineASMColon, TT_SelectorName) || - (Current->is(tok::comment) && NextNonComment && - NextNonComment->is(TT_SelectorName))) - return 0; - if (Current->is(TT_RangeBasedForLoopColon)) - return prec::Comma; - if ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - Current->is(Keywords.kw_instanceof)) - return prec::Relational; - if (Style.Language == FormatStyle::LK_JavaScript && - Current->isOneOf(Keywords.kw_in, Keywords.kw_as)) - return prec::Relational; - if (Current->is(TT_BinaryOperator) || Current->is(tok::comma)) - return Current->getPrecedence(); - if (Current->isOneOf(tok::period, tok::arrow)) - return PrecedenceArrowAndPeriod; - if ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements, - Keywords.kw_throws)) - return 0; - } - return -1; - } - - void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { - Start->FakeLParens.push_back(Precedence); - if (Precedence > prec::Unknown) - Start->StartsBinaryExpression = true; - if (Current) { - FormatToken *Previous = Current->Previous; - while (Previous->is(tok::comment) && Previous->Previous) - Previous = Previous->Previous; - ++Previous->FakeRParens; - if (Precedence > prec::Unknown) - Previous->EndsBinaryExpression = true; - } - } - - /// Parse unary operator expressions and surround them with fake - /// parentheses if appropriate. - void parseUnaryOperator() { - llvm::SmallVector<FormatToken *, 2> Tokens; - while (Current && Current->is(TT_UnaryOperator)) { - Tokens.push_back(Current); - next(); - } - parse(PrecedenceArrowAndPeriod); - for (FormatToken *Token : llvm::reverse(Tokens)) - // The actual precedence doesn't matter. - addFakeParenthesis(Token, prec::Unknown); - } - - void parseConditionalExpr() { - while (Current && Current->isTrailingComment()) { - next(); - } - FormatToken *Start = Current; - parse(prec::LogicalOr); - if (!Current || !Current->is(tok::question)) - return; - next(); - parse(prec::Assignment); - if (!Current || Current->isNot(TT_ConditionalExpr)) - return; - next(); - parse(prec::Assignment); - addFakeParenthesis(Start, prec::Conditional); - } - - void next(bool SkipPastLeadingComments = true) { - if (Current) - Current = Current->Next; - while (Current && - (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && - Current->isTrailingComment()) - Current = Current->Next; - } - - const FormatStyle &Style; - const AdditionalKeywords &Keywords; - FormatToken *Current; -}; - -} // end anonymous namespace - -void TokenAnnotator::setCommentLineLevels( - SmallVectorImpl<AnnotatedLine *> &Lines) { - const AnnotatedLine *NextNonCommentLine = nullptr; - for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), - E = Lines.rend(); - I != E; ++I) { - bool CommentLine = true; - for (const FormatToken *Tok = (*I)->First; Tok; Tok = Tok->Next) { - if (!Tok->is(tok::comment)) { - CommentLine = false; - break; - } - } - - // If the comment is currently aligned with the line immediately following - // it, that's probably intentional and we should keep it. - if (NextNonCommentLine && CommentLine && - NextNonCommentLine->First->NewlinesBefore <= 1 && - NextNonCommentLine->First->OriginalColumn == - (*I)->First->OriginalColumn) { - // Align comments for preprocessor lines with the # in column 0. - // Otherwise, align with the next line. - (*I)->Level = (NextNonCommentLine->Type == LT_PreprocessorDirective || - NextNonCommentLine->Type == LT_ImportStatement) - ? 0 - : NextNonCommentLine->Level; - } else { - NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; - } - - setCommentLineLevels((*I)->Children); - } -} - -static unsigned maxNestingDepth(const AnnotatedLine &Line) { - unsigned Result = 0; - for (const auto *Tok = Line.First; Tok != nullptr; Tok = Tok->Next) - Result = std::max(Result, Tok->NestingLevel); - return Result; -} - -void TokenAnnotator::annotate(AnnotatedLine &Line) { - for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), - E = Line.Children.end(); - I != E; ++I) { - annotate(**I); - } - AnnotatingParser Parser(Style, Line, Keywords); - Line.Type = Parser.parseLine(); - - // With very deep nesting, ExpressionParser uses lots of stack and the - // formatting algorithm is very slow. We're not going to do a good job here - // anyway - it's probably generated code being formatted by mistake. - // Just skip the whole line. - if (maxNestingDepth(Line) > 50) - Line.Type = LT_Invalid; - - if (Line.Type == LT_Invalid) - return; - - ExpressionParser ExprParser(Style, Keywords, Line); - ExprParser.parse(); - - if (Line.startsWith(TT_ObjCMethodSpecifier)) - Line.Type = LT_ObjCMethodDecl; - else if (Line.startsWith(TT_ObjCDecl)) - Line.Type = LT_ObjCDecl; - else if (Line.startsWith(TT_ObjCProperty)) - Line.Type = LT_ObjCProperty; - - Line.First->SpacesRequiredBefore = 1; - Line.First->CanBreakBefore = Line.First->MustBreakBefore; -} - -// This function heuristically determines whether 'Current' starts the name of a -// function declaration. -static bool isFunctionDeclarationName(const FormatToken &Current, - const AnnotatedLine &Line) { - auto skipOperatorName = [](const FormatToken *Next) -> const FormatToken * { - for (; Next; Next = Next->Next) { - if (Next->is(TT_OverloadedOperatorLParen)) - return Next; - if (Next->is(TT_OverloadedOperator)) - continue; - if (Next->isOneOf(tok::kw_new, tok::kw_delete)) { - // For 'new[]' and 'delete[]'. - if (Next->Next && Next->Next->is(tok::l_square) && Next->Next->Next && - Next->Next->Next->is(tok::r_square)) - Next = Next->Next->Next; - continue; - } - - break; - } - return nullptr; - }; - - // Find parentheses of parameter list. - const FormatToken *Next = Current.Next; - if (Current.is(tok::kw_operator)) { - if (Current.Previous && Current.Previous->is(tok::coloncolon)) - return false; - Next = skipOperatorName(Next); - } else { - if (!Current.is(TT_StartOfName) || Current.NestingLevel != 0) - return false; - for (; Next; Next = Next->Next) { - if (Next->is(TT_TemplateOpener)) { - Next = Next->MatchingParen; - } else if (Next->is(tok::coloncolon)) { - Next = Next->Next; - if (!Next) - return false; - if (Next->is(tok::kw_operator)) { - Next = skipOperatorName(Next->Next); - break; - } - if (!Next->is(tok::identifier)) - return false; - } else if (Next->is(tok::l_paren)) { - break; - } else { - return false; - } - } - } - - // Check whether parameter list can belong to a function declaration. - if (!Next || !Next->is(tok::l_paren) || !Next->MatchingParen) - return false; - // If the lines ends with "{", this is likely an function definition. - if (Line.Last->is(tok::l_brace)) - return true; - if (Next->Next == Next->MatchingParen) - return true; // Empty parentheses. - // If there is an &/&& after the r_paren, this is likely a function. - if (Next->MatchingParen->Next && - Next->MatchingParen->Next->is(TT_PointerOrReference)) - return true; - for (const FormatToken *Tok = Next->Next; Tok && Tok != Next->MatchingParen; - Tok = Tok->Next) { - if (Tok->is(tok::l_paren) && Tok->MatchingParen) { - Tok = Tok->MatchingParen; - continue; - } - if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || - Tok->isOneOf(TT_PointerOrReference, TT_StartOfName, tok::ellipsis)) - return true; - if (Tok->isOneOf(tok::l_brace, tok::string_literal, TT_ObjCMethodExpr) || - Tok->Tok.isLiteral()) - return false; - } - return false; -} - -bool TokenAnnotator::mustBreakForReturnType(const AnnotatedLine &Line) const { - assert(Line.MightBeFunctionDecl); - - if ((Style.AlwaysBreakAfterReturnType == FormatStyle::RTBS_TopLevel || - Style.AlwaysBreakAfterReturnType == - FormatStyle::RTBS_TopLevelDefinitions) && - Line.Level > 0) - return false; - - switch (Style.AlwaysBreakAfterReturnType) { - case FormatStyle::RTBS_None: - return false; - case FormatStyle::RTBS_All: - case FormatStyle::RTBS_TopLevel: - return true; - case FormatStyle::RTBS_AllDefinitions: - case FormatStyle::RTBS_TopLevelDefinitions: - return Line.mightBeFunctionDefinition(); - } - - return false; -} - -void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { - for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), - E = Line.Children.end(); - I != E; ++I) { - calculateFormattingInformation(**I); - } - - Line.First->TotalLength = - Line.First->IsMultiline ? Style.ColumnLimit - : Line.FirstStartColumn + Line.First->ColumnWidth; - FormatToken *Current = Line.First->Next; - bool InFunctionDecl = Line.MightBeFunctionDecl; - while (Current) { - if (isFunctionDeclarationName(*Current, Line)) - Current->Type = TT_FunctionDeclarationName; - if (Current->is(TT_LineComment)) { - if (Current->Previous->BlockKind == BK_BracedInit && - Current->Previous->opensScope()) - Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; - else - Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; - - // If we find a trailing comment, iterate backwards to determine whether - // it seems to relate to a specific parameter. If so, break before that - // parameter to avoid changing the comment's meaning. E.g. don't move 'b' - // to the previous line in: - // SomeFunction(a, - // b, // comment - // c); - if (!Current->HasUnescapedNewline) { - for (FormatToken *Parameter = Current->Previous; Parameter; - Parameter = Parameter->Previous) { - if (Parameter->isOneOf(tok::comment, tok::r_brace)) - break; - if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { - if (!Parameter->Previous->is(TT_CtorInitializerComma) && - Parameter->HasUnescapedNewline) - Parameter->MustBreakBefore = true; - break; - } - } - } - } else if (Current->SpacesRequiredBefore == 0 && - spaceRequiredBefore(Line, *Current)) { - Current->SpacesRequiredBefore = 1; - } - - Current->MustBreakBefore = - Current->MustBreakBefore || mustBreakBefore(Line, *Current); - - if (!Current->MustBreakBefore && InFunctionDecl && - Current->is(TT_FunctionDeclarationName)) - Current->MustBreakBefore = mustBreakForReturnType(Line); - - Current->CanBreakBefore = - Current->MustBreakBefore || canBreakBefore(Line, *Current); - unsigned ChildSize = 0; - if (Current->Previous->Children.size() == 1) { - FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; - ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit - : LastOfChild.TotalLength + 1; - } - const FormatToken *Prev = Current->Previous; - if (Current->MustBreakBefore || Prev->Children.size() > 1 || - (Prev->Children.size() == 1 && - Prev->Children[0]->First->MustBreakBefore) || - Current->IsMultiline) - Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; - else - Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + - ChildSize + Current->SpacesRequiredBefore; - - if (Current->is(TT_CtorInitializerColon)) - InFunctionDecl = false; - - // FIXME: Only calculate this if CanBreakBefore is true once static - // initializers etc. are sorted out. - // FIXME: Move magic numbers to a better place. - - // Reduce penalty for aligning ObjC method arguments using the colon - // alignment as this is the canonical way (still prefer fitting everything - // into one line if possible). Trying to fit a whole expression into one - // line should not force other line breaks (e.g. when ObjC method - // expression is a part of other expression). - Current->SplitPenalty = splitPenalty(Line, *Current, InFunctionDecl); - if (Style.Language == FormatStyle::LK_ObjC && - Current->is(TT_SelectorName) && Current->ParameterIndex > 0) { - if (Current->ParameterIndex == 1) - Current->SplitPenalty += 5 * Current->BindingStrength; - } else { - Current->SplitPenalty += 20 * Current->BindingStrength; - } - - Current = Current->Next; - } - - calculateUnbreakableTailLengths(Line); - unsigned IndentLevel = Line.Level; - for (Current = Line.First; Current != nullptr; Current = Current->Next) { - if (Current->Role) - Current->Role->precomputeFormattingInfos(Current); - if (Current->MatchingParen && - Current->MatchingParen->opensBlockOrBlockTypeList(Style)) { - assert(IndentLevel > 0); - --IndentLevel; - } - Current->IndentLevel = IndentLevel; - if (Current->opensBlockOrBlockTypeList(Style)) - ++IndentLevel; - } - - LLVM_DEBUG({ printDebugInfo(Line); }); -} - -void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { - unsigned UnbreakableTailLength = 0; - FormatToken *Current = Line.Last; - while (Current) { - Current->UnbreakableTailLength = UnbreakableTailLength; - if (Current->CanBreakBefore || - Current->isOneOf(tok::comment, tok::string_literal)) { - UnbreakableTailLength = 0; - } else { - UnbreakableTailLength += - Current->ColumnWidth + Current->SpacesRequiredBefore; - } - Current = Current->Previous; - } -} - -unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, - const FormatToken &Tok, - bool InFunctionDecl) { - const FormatToken &Left = *Tok.Previous; - const FormatToken &Right = Tok; - - if (Left.is(tok::semi)) - return 0; - - if (Style.Language == FormatStyle::LK_Java) { - if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_throws)) - return 1; - if (Right.is(Keywords.kw_implements)) - return 2; - if (Left.is(tok::comma) && Left.NestingLevel == 0) - return 3; - } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Right.is(Keywords.kw_function) && Left.isNot(tok::comma)) - return 100; - if (Left.is(TT_JsTypeColon)) - return 35; - if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || - (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) - return 100; - // Prefer breaking call chains (".foo") over empty "{}", "[]" or "()". - if (Left.opensScope() && Right.closesScope()) - return 200; - } - - if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) - return 1; - if (Right.is(tok::l_square)) { - if (Style.Language == FormatStyle::LK_Proto) - return 1; - if (Left.is(tok::r_square)) - return 200; - // Slightly prefer formatting local lambda definitions like functions. - if (Right.is(TT_LambdaLSquare) && Left.is(tok::equal)) - return 35; - if (!Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, - TT_ArrayInitializerLSquare, - TT_DesignatedInitializerLSquare, TT_AttributeSquare)) - return 500; - } - - if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || - Right.is(tok::kw_operator)) { - if (Line.startsWith(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) - return 3; - if (Left.is(TT_StartOfName)) - return 110; - if (InFunctionDecl && Right.NestingLevel == 0) - return Style.PenaltyReturnTypeOnItsOwnLine; - return 200; - } - if (Right.is(TT_PointerOrReference)) - return 190; - if (Right.is(TT_LambdaArrow)) - return 110; - if (Left.is(tok::equal) && Right.is(tok::l_brace)) - return 160; - if (Left.is(TT_CastRParen)) - return 100; - if (Left.is(tok::coloncolon) || - (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) - return 500; - if (Left.isOneOf(tok::kw_class, tok::kw_struct)) - return 5000; - if (Left.is(tok::comment)) - return 1000; - - if (Left.isOneOf(TT_RangeBasedForLoopColon, TT_InheritanceColon, - TT_CtorInitializerColon)) - return 2; - - if (Right.isMemberAccess()) { - // Breaking before the "./->" of a chained call/member access is reasonably - // cheap, as formatting those with one call per line is generally - // desirable. In particular, it should be cheaper to break before the call - // than it is to break inside a call's parameters, which could lead to weird - // "hanging" indents. The exception is the very last "./->" to support this - // frequent pattern: - // - // aaaaaaaa.aaaaaaaa.bbbbbbb().ccccccccccccccccccccc( - // dddddddd); - // - // which might otherwise be blown up onto many lines. Here, clang-format - // won't produce "hanging" indents anyway as there is no other trailing - // call. - // - // Also apply higher penalty is not a call as that might lead to a wrapping - // like: - // - // aaaaaaa - // .aaaaaaaaa.bbbbbbbb(cccccccc); - return !Right.NextOperator || !Right.NextOperator->Previous->closesScope() - ? 150 - : 35; - } - - if (Right.is(TT_TrailingAnnotation) && - (!Right.Next || Right.Next->isNot(tok::l_paren))) { - // Moving trailing annotations to the next line is fine for ObjC method - // declarations. - if (Line.startsWith(TT_ObjCMethodSpecifier)) - return 10; - // Generally, breaking before a trailing annotation is bad unless it is - // function-like. It seems to be especially preferable to keep standard - // annotations (i.e. "const", "final" and "override") on the same line. - // Use a slightly higher penalty after ")" so that annotations like - // "const override" are kept together. - bool is_short_annotation = Right.TokenText.size() < 10; - return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); - } - - // In for-loops, prefer breaking at ',' and ';'. - if (Line.startsWith(tok::kw_for) && Left.is(tok::equal)) - return 4; - - // In Objective-C method expressions, prefer breaking before "param:" over - // breaking after it. - if (Right.is(TT_SelectorName)) - return 0; - if (Left.is(tok::colon) && Left.is(TT_ObjCMethodExpr)) - return Line.MightBeFunctionDecl ? 50 : 500; - - // In Objective-C type declarations, avoid breaking after the category's - // open paren (we'll prefer breaking after the protocol list's opening - // angle bracket, if present). - if (Line.Type == LT_ObjCDecl && Left.is(tok::l_paren) && Left.Previous && - Left.Previous->isOneOf(tok::identifier, tok::greater)) - return 500; - - if (Left.is(tok::l_paren) && InFunctionDecl && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) - return 100; - if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->isOneOf(tok::kw_if, tok::kw_for) || - Left.Previous->endsSequence(tok::kw_constexpr, tok::kw_if))) - return 1000; - if (Left.is(tok::equal) && InFunctionDecl) - return 110; - if (Right.is(tok::r_brace)) - return 1; - if (Left.is(TT_TemplateOpener)) - return 100; - if (Left.opensScope()) { - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) - return 0; - if (Left.is(tok::l_brace) && !Style.Cpp11BracedListStyle) - return 19; - return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter - : 19; - } - if (Left.is(TT_JavaAnnotation)) - return 50; - - if (Left.isOneOf(tok::plus, tok::comma) && Left.Previous && - Left.Previous->isLabelString() && - (Left.NextOperator || Left.OperatorIndex != 0)) - return 50; - if (Right.is(tok::plus) && Left.isLabelString() && - (Right.NextOperator || Right.OperatorIndex != 0)) - return 25; - if (Left.is(tok::comma)) - return 1; - if (Right.is(tok::lessless) && Left.isLabelString() && - (Right.NextOperator || Right.OperatorIndex != 1)) - return 25; - if (Right.is(tok::lessless)) { - // Breaking at a << is really cheap. - if (!Left.is(tok::r_paren) || Right.OperatorIndex > 0) - // Slightly prefer to break before the first one in log-like statements. - return 2; - return 1; - } - if (Left.ClosesTemplateDeclaration) - return Style.PenaltyBreakTemplateDeclaration; - if (Left.is(TT_ConditionalExpr)) - return prec::Conditional; - prec::Level Level = Left.getPrecedence(); - if (Level == prec::Unknown) - Level = Right.getPrecedence(); - if (Level == prec::Assignment) - return Style.PenaltyBreakAssignment; - if (Level != prec::Unknown) - return Level; - - return 3; -} - -bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, - const FormatToken &Left, - const FormatToken &Right) { - if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) - return true; - if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) - return true; - if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && - Left.Tok.getObjCKeywordID() == tok::objc_property) - return true; - if (Right.is(tok::hashhash)) - return Left.is(tok::hash); - if (Left.isOneOf(tok::hashhash, tok::hash)) - return Right.is(tok::hash); - if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) - return Style.SpaceInEmptyParentheses; - if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) - return (Right.is(TT_CastRParen) || - (Left.MatchingParen && Left.MatchingParen->is(TT_CastRParen))) - ? Style.SpacesInCStyleCastParentheses - : Style.SpacesInParentheses; - if (Right.isOneOf(tok::semi, tok::comma)) - return false; - if (Right.is(tok::less) && Line.Type == LT_ObjCDecl) { - bool IsLightweightGeneric = - Right.MatchingParen && Right.MatchingParen->Next && - Right.MatchingParen->Next->is(tok::colon); - return !IsLightweightGeneric && Style.ObjCSpaceBeforeProtocolList; - } - if (Right.is(tok::less) && Left.is(tok::kw_template)) - return Style.SpaceAfterTemplateKeyword; - if (Left.isOneOf(tok::exclaim, tok::tilde)) - return false; - if (Left.is(tok::at) && - Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, - tok::numeric_constant, tok::l_paren, tok::l_brace, - tok::kw_true, tok::kw_false)) - return false; - if (Left.is(tok::colon)) - return !Left.is(TT_ObjCMethodExpr); - if (Left.is(tok::coloncolon)) - return false; - if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) { - if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && - (Left.is(TT_DictLiteral) || Right.is(TT_DictLiteral)))) { - // Format empty list as `<>`. - if (Left.is(tok::less) && Right.is(tok::greater)) - return false; - return !Style.Cpp11BracedListStyle; - } - return false; - } - if (Right.is(tok::ellipsis)) - return Left.Tok.isLiteral() || (Left.is(tok::identifier) && Left.Previous && - Left.Previous->is(tok::kw_case)); - if (Left.is(tok::l_square) && Right.is(tok::amp)) - return false; - if (Right.is(TT_PointerOrReference)) { - if (Left.is(tok::r_paren) && Line.MightBeFunctionDecl) { - if (!Left.MatchingParen) - return true; - FormatToken *TokenBeforeMatchingParen = - Left.MatchingParen->getPreviousNonComment(); - if (!TokenBeforeMatchingParen || - !TokenBeforeMatchingParen->isOneOf(tok::kw_typeof, tok::kw_decltype)) - return true; - } - return (Left.Tok.isLiteral() || - (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && - (Style.PointerAlignment != FormatStyle::PAS_Left || - (Line.IsMultiVariableDeclStmt && - (Left.NestingLevel == 0 || - (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); - } - if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && - (!Left.is(TT_PointerOrReference) || - (Style.PointerAlignment != FormatStyle::PAS_Right && - !Line.IsMultiVariableDeclStmt))) - return true; - if (Left.is(TT_PointerOrReference)) - return Right.Tok.isLiteral() || Right.is(TT_BlockComment) || - (Right.isOneOf(Keywords.kw_override, Keywords.kw_final) && - !Right.is(TT_StartOfName)) || - (Right.is(tok::l_brace) && Right.BlockKind == BK_Block) || - (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, - tok::l_paren) && - (Style.PointerAlignment != FormatStyle::PAS_Right && - !Line.IsMultiVariableDeclStmt) && - Left.Previous && - !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); - if (Right.is(tok::star) && Left.is(tok::l_paren)) - return false; - const auto SpaceRequiredForArrayInitializerLSquare = - [](const FormatToken &LSquareTok, const FormatStyle &Style) { - return Style.SpacesInContainerLiterals || - ((Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - !Style.Cpp11BracedListStyle && - LSquareTok.endsSequence(tok::l_square, tok::colon, - TT_SelectorName)); - }; - if (Left.is(tok::l_square)) - return (Left.is(TT_ArrayInitializerLSquare) && Right.isNot(tok::r_square) && - SpaceRequiredForArrayInitializerLSquare(Left, Style)) || - (Left.isOneOf(TT_ArraySubscriptLSquare, - TT_StructuredBindingLSquare) && - Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); - if (Right.is(tok::r_square)) - return Right.MatchingParen && - ((Right.MatchingParen->is(TT_ArrayInitializerLSquare) && - SpaceRequiredForArrayInitializerLSquare(*Right.MatchingParen, - Style)) || - (Style.SpacesInSquareBrackets && - Right.MatchingParen->isOneOf(TT_ArraySubscriptLSquare, - TT_StructuredBindingLSquare)) || - Right.MatchingParen->is(TT_AttributeParen)); - if (Right.is(tok::l_square) && - !Right.isOneOf(TT_ObjCMethodExpr, TT_LambdaLSquare, - TT_DesignatedInitializerLSquare, - TT_StructuredBindingLSquare, TT_AttributeSquare) && - !Left.isOneOf(tok::numeric_constant, TT_DictLiteral)) - return false; - if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) - return !Left.Children.empty(); // No spaces in "{}". - if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || - (Right.is(tok::r_brace) && Right.MatchingParen && - Right.MatchingParen->BlockKind != BK_Block)) - return !Style.Cpp11BracedListStyle; - if (Left.is(TT_BlockComment)) - // No whitespace in x(/*foo=*/1), except for JavaScript. - return Style.Language == FormatStyle::LK_JavaScript || - !Left.TokenText.endswith("=*/"); - if (Right.is(tok::l_paren)) { - if ((Left.is(tok::r_paren) && Left.is(TT_AttributeParen)) || - (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) - return true; - return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || - (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && - (Left.isOneOf(tok::kw_if, tok::pp_elif, tok::kw_for, tok::kw_while, - tok::kw_switch, tok::kw_case, TT_ForEachMacro, - TT_ObjCForIn) || - Left.endsSequence(tok::kw_constexpr, tok::kw_if) || - (Left.isOneOf(tok::kw_try, Keywords.kw___except, tok::kw_catch, - tok::kw_new, tok::kw_delete) && - (!Left.Previous || Left.Previous->isNot(tok::period))))) || - (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && - (Left.is(tok::identifier) || Left.isFunctionLikeKeyword() || - Left.is(tok::r_paren)) && - Line.Type != LT_PreprocessorDirective); - } - if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) - return false; - if (Right.is(TT_UnaryOperator)) - return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && - (Left.isNot(tok::colon) || Left.isNot(TT_ObjCMethodExpr)); - if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, - tok::r_paren) || - Left.isSimpleTypeSpecifier()) && - Right.is(tok::l_brace) && Right.getNextNonComment() && - Right.BlockKind != BK_Block) - return false; - if (Left.is(tok::period) || Right.is(tok::period)) - return false; - if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") - return false; - if (Left.is(TT_TemplateCloser) && Left.MatchingParen && - Left.MatchingParen->Previous && - (Left.MatchingParen->Previous->is(tok::period) || - Left.MatchingParen->Previous->is(tok::coloncolon))) - // Java call to generic function with explicit type: - // A.<B<C<...>>>DoSomething(); - // A::<B<C<...>>>DoSomething(); // With a Java 8 method reference. - return false; - if (Left.is(TT_TemplateCloser) && Right.is(tok::l_square)) - return false; - if (Left.is(tok::l_brace) && Left.endsSequence(TT_DictLiteral, tok::at)) - // Objective-C dictionary literal -> no space after opening brace. - return false; - if (Right.is(tok::r_brace) && Right.MatchingParen && - Right.MatchingParen->endsSequence(TT_DictLiteral, tok::at)) - // Objective-C dictionary literal -> no space before closing brace. - return false; - return true; -} - -bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, - const FormatToken &Right) { - const FormatToken &Left = *Right.Previous; - if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) - return true; // Never ever merge two identifiers. - if (Style.isCpp()) { - if (Left.is(tok::kw_operator)) - return Right.is(tok::coloncolon); - if (Right.is(tok::l_brace) && Right.BlockKind == BK_BracedInit && - !Left.opensScope() && Style.SpaceBeforeCpp11BracedList) - return true; - } else if (Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { - if (Right.is(tok::period) && - Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, - Keywords.kw_repeated, Keywords.kw_extend)) - return true; - if (Right.is(tok::l_paren) && - Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) - return true; - if (Right.isOneOf(tok::l_brace, tok::less) && Left.is(TT_SelectorName)) - return true; - // Slashes occur in text protocol extension syntax: [type/type] { ... }. - if (Left.is(tok::slash) || Right.is(tok::slash)) - return false; - if (Left.MatchingParen && Left.MatchingParen->is(TT_ProtoExtensionLSquare) && - Right.isOneOf(tok::l_brace, tok::less)) - return !Style.Cpp11BracedListStyle; - // A percent is probably part of a formatting specification, such as %lld. - if (Left.is(tok::percent)) - return false; - // Preserve the existence of a space before a percent for cases like 0x%04x - // and "%d %d" - if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) - return Right.WhitespaceRange.getEnd() != Right.WhitespaceRange.getBegin(); - } else if (Style.Language == FormatStyle::LK_JavaScript) { - if (Left.is(TT_JsFatArrow)) - return true; - // for await ( ... - if (Right.is(tok::l_paren) && Left.is(Keywords.kw_await) && Left.Previous && - Left.Previous->is(tok::kw_for)) - return true; - if (Left.is(Keywords.kw_async) && Right.is(tok::l_paren) && - Right.MatchingParen) { - const FormatToken *Next = Right.MatchingParen->getNextNonComment(); - // An async arrow function, for example: `x = async () => foo();`, - // as opposed to calling a function called async: `x = async();` - if (Next && Next->is(TT_JsFatArrow)) - return true; - } - if ((Left.is(TT_TemplateString) && Left.TokenText.endswith("${")) || - (Right.is(TT_TemplateString) && Right.TokenText.startswith("}"))) - return false; - // In tagged template literals ("html`bar baz`"), there is no space between - // the tag identifier and the template string. getIdentifierInfo makes sure - // that the identifier is not a pseudo keyword like `yield`, either. - if (Left.is(tok::identifier) && Keywords.IsJavaScriptIdentifier(Left) && - Right.is(TT_TemplateString)) - return false; - if (Right.is(tok::star) && - Left.isOneOf(Keywords.kw_function, Keywords.kw_yield)) - return false; - if (Right.isOneOf(tok::l_brace, tok::l_square) && - Left.isOneOf(Keywords.kw_function, Keywords.kw_yield, - Keywords.kw_extends, Keywords.kw_implements)) - return true; - if (Right.is(tok::l_paren)) { - // JS methods can use some keywords as names (e.g. `delete()`). - if (Line.MustBeDeclaration && Left.Tok.getIdentifierInfo()) - return false; - // Valid JS method names can include keywords, e.g. `foo.delete()` or - // `bar.instanceof()`. Recognize call positions by preceding period. - if (Left.Previous && Left.Previous->is(tok::period) && - Left.Tok.getIdentifierInfo()) - return false; - // Additional unary JavaScript operators that need a space after. - if (Left.isOneOf(tok::kw_throw, Keywords.kw_await, Keywords.kw_typeof, - tok::kw_void)) - return true; - } - if ((Left.isOneOf(Keywords.kw_let, Keywords.kw_var, Keywords.kw_in, - tok::kw_const) || - // "of" is only a keyword if it appears after another identifier - // (e.g. as "const x of y" in a for loop), or after a destructuring - // operation (const [x, y] of z, const {a, b} of c). - (Left.is(Keywords.kw_of) && Left.Previous && - (Left.Previous->Tok.is(tok::identifier) || - Left.Previous->isOneOf(tok::r_square, tok::r_brace)))) && - (!Left.Previous || !Left.Previous->is(tok::period))) - return true; - if (Left.isOneOf(tok::kw_for, Keywords.kw_as) && Left.Previous && - Left.Previous->is(tok::period) && Right.is(tok::l_paren)) - return false; - if (Left.is(Keywords.kw_as) && - Right.isOneOf(tok::l_square, tok::l_brace, tok::l_paren)) - return true; - if (Left.is(tok::kw_default) && Left.Previous && - Left.Previous->is(tok::kw_export)) - return true; - if (Left.is(Keywords.kw_is) && Right.is(tok::l_brace)) - return true; - if (Right.isOneOf(TT_JsTypeColon, TT_JsTypeOptionalQuestion)) - return false; - if (Left.is(TT_JsTypeOperator) || Right.is(TT_JsTypeOperator)) - return false; - if ((Left.is(tok::l_brace) || Right.is(tok::r_brace)) && - Line.First->isOneOf(Keywords.kw_import, tok::kw_export)) - return false; - if (Left.is(tok::ellipsis)) - return false; - if (Left.is(TT_TemplateCloser) && - !Right.isOneOf(tok::equal, tok::l_brace, tok::comma, tok::l_square, - Keywords.kw_implements, Keywords.kw_extends)) - // Type assertions ('<type>expr') are not followed by whitespace. Other - // locations that should have whitespace following are identified by the - // above set of follower tokens. - return false; - if (Right.is(TT_JsNonNullAssertion)) - return false; - if (Left.is(TT_JsNonNullAssertion) && - Right.isOneOf(Keywords.kw_as, Keywords.kw_in)) - return true; // "x! as string", "x! in y" - } else if (Style.Language == FormatStyle::LK_Java) { - if (Left.is(tok::r_square) && Right.is(tok::l_brace)) - return true; - if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) - return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; - if ((Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, - tok::kw_protected) || - Left.isOneOf(Keywords.kw_final, Keywords.kw_abstract, - Keywords.kw_native)) && - Right.is(TT_TemplateOpener)) - return true; - } - if (Left.is(TT_ImplicitStringLiteral)) - return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); - if (Line.Type == LT_ObjCMethodDecl) { - if (Left.is(TT_ObjCMethodSpecifier)) - return true; - if (Left.is(tok::r_paren) && canBeObjCSelectorComponent(Right)) - // Don't space between ')' and <id> or ')' and 'new'. 'new' is not a - // keyword in Objective-C, and '+ (instancetype)new;' is a standard class - // method declaration. - return false; - } - if (Line.Type == LT_ObjCProperty && - (Right.is(tok::equal) || Left.is(tok::equal))) - return false; - - if (Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow) || - Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) - return true; - if (Right.is(TT_OverloadedOperatorLParen)) - return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; - if (Left.is(tok::comma)) - return true; - if (Right.is(tok::comma)) - return false; - if (Right.is(TT_ObjCBlockLParen)) - return true; - if (Right.is(TT_CtorInitializerColon)) - return Style.SpaceBeforeCtorInitializerColon; - if (Right.is(TT_InheritanceColon) && !Style.SpaceBeforeInheritanceColon) - return false; - if (Right.is(TT_RangeBasedForLoopColon) && - !Style.SpaceBeforeRangeBasedForLoopColon) - return false; - if (Right.is(tok::colon)) { - if (Line.First->isOneOf(tok::kw_case, tok::kw_default) || - !Right.getNextNonComment() || Right.getNextNonComment()->is(tok::semi)) - return false; - if (Right.is(TT_ObjCMethodExpr)) - return false; - if (Left.is(tok::question)) - return false; - if (Right.is(TT_InlineASMColon) && Left.is(tok::coloncolon)) - return false; - if (Right.is(TT_DictLiteral)) - return Style.SpacesInContainerLiterals; - if (Right.is(TT_AttributeColon)) - return false; - return true; - } - if (Left.is(TT_UnaryOperator)) - return Right.is(TT_BinaryOperator); - - // If the next token is a binary operator or a selector name, we have - // incorrectly classified the parenthesis as a cast. FIXME: Detect correctly. - if (Left.is(TT_CastRParen)) - return Style.SpaceAfterCStyleCast || - Right.isOneOf(TT_BinaryOperator, TT_SelectorName); - - if (Left.is(tok::greater) && Right.is(tok::greater)) { - if (Style.Language == FormatStyle::LK_TextProto || - (Style.Language == FormatStyle::LK_Proto && Left.is(TT_DictLiteral))) - return !Style.Cpp11BracedListStyle; - return Right.is(TT_TemplateCloser) && Left.is(TT_TemplateCloser) && - (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); - } - if (Right.isOneOf(tok::arrow, tok::arrowstar, tok::periodstar) || - Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || - (Right.is(tok::period) && Right.isNot(TT_DesignatedInitializerPeriod))) - return false; - if (!Style.SpaceBeforeAssignmentOperators && - Right.getPrecedence() == prec::Assignment) - return false; - if (Style.Language == FormatStyle::LK_Java && Right.is(tok::coloncolon) && - (Left.is(tok::identifier) || Left.is(tok::kw_this))) - return false; - if (Right.is(tok::coloncolon) && Left.is(tok::identifier)) - // Generally don't remove existing spaces between an identifier and "::". - // The identifier might actually be a macro name such as ALWAYS_INLINE. If - // this turns out to be too lenient, add analysis of the identifier itself. - return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); - if (Right.is(tok::coloncolon) && !Left.isOneOf(tok::l_brace, tok::comment)) - return (Left.is(TT_TemplateOpener) && - Style.Standard == FormatStyle::LS_Cpp03) || - !(Left.isOneOf(tok::l_paren, tok::r_paren, tok::l_square, - tok::kw___super, TT_TemplateCloser, - TT_TemplateOpener)) || - (Left.is(tok ::l_paren) && Style.SpacesInParentheses); - if ((Left.is(TT_TemplateOpener)) != (Right.is(TT_TemplateCloser))) - return Style.SpacesInAngles; - // Space before TT_StructuredBindingLSquare. - if (Right.is(TT_StructuredBindingLSquare)) - return !Left.isOneOf(tok::amp, tok::ampamp) || - Style.PointerAlignment != FormatStyle::PAS_Right; - // Space before & or && following a TT_StructuredBindingLSquare. - if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && - Right.isOneOf(tok::amp, tok::ampamp)) - return Style.PointerAlignment != FormatStyle::PAS_Left; - if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || - (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && - !Right.is(tok::r_paren))) - return true; - if (Left.is(TT_TemplateCloser) && Right.is(tok::l_paren) && - Right.isNot(TT_FunctionTypeLParen)) - return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; - if (Right.is(TT_TemplateOpener) && Left.is(tok::r_paren) && - Left.MatchingParen && Left.MatchingParen->is(TT_OverloadedOperatorLParen)) - return false; - if (Right.is(tok::less) && Left.isNot(tok::l_paren) && - Line.startsWith(tok::hash)) - return true; - if (Right.is(TT_TrailingUnaryOperator)) - return false; - if (Left.is(TT_RegexLiteral)) - return false; - return spaceRequiredBetween(Line, Left, Right); -} - -// Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. -static bool isAllmanBrace(const FormatToken &Tok) { - return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && - !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral); -} - -bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, - const FormatToken &Right) { - const FormatToken &Left = *Right.Previous; - if (Right.NewlinesBefore > 1 && Style.MaxEmptyLinesToKeep > 0) - return true; - - if (Style.Language == FormatStyle::LK_JavaScript) { - // FIXME: This might apply to other languages and token kinds. - if (Right.is(tok::string_literal) && Left.is(tok::plus) && Left.Previous && - Left.Previous->is(tok::string_literal)) - return true; - if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace) && Line.Level == 0 && - Left.Previous && Left.Previous->is(tok::equal) && - Line.First->isOneOf(tok::identifier, Keywords.kw_import, tok::kw_export, - tok::kw_const) && - // kw_var/kw_let are pseudo-tokens that are tok::identifier, so match - // above. - !Line.First->isOneOf(Keywords.kw_var, Keywords.kw_let)) - // Object literals on the top level of a file are treated as "enum-style". - // Each key/value pair is put on a separate line, instead of bin-packing. - return true; - if (Left.is(tok::l_brace) && Line.Level == 0 && - (Line.startsWith(tok::kw_enum) || - Line.startsWith(tok::kw_const, tok::kw_enum) || - Line.startsWith(tok::kw_export, tok::kw_enum) || - Line.startsWith(tok::kw_export, tok::kw_const, tok::kw_enum))) - // JavaScript top-level enum key/value pairs are put on separate lines - // instead of bin-packing. - return true; - if (Right.is(tok::r_brace) && Left.is(tok::l_brace) && - !Left.Children.empty()) - // Support AllowShortFunctionsOnASingleLine for JavaScript. - return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || - Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Empty || - (Left.NestingLevel == 0 && Line.Level == 0 && - Style.AllowShortFunctionsOnASingleLine & - FormatStyle::SFS_InlineOnly); - } else if (Style.Language == FormatStyle::LK_Java) { - if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && - Right.Next->is(tok::string_literal)) - return true; - } else if (Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC || - Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TableGen || - Style.Language == FormatStyle::LK_TextProto) { - if (Left.isStringLiteral() && Right.isStringLiteral()) - return true; - } - - // If the last token before a '}', ']', or ')' is a comma or a trailing - // comment, the intention is to insert a line break after it in order to make - // shuffling around entries easier. Import statements, especially in - // JavaScript, can be an exception to this rule. - if (Style.JavaScriptWrapImports || Line.Type != LT_ImportStatement) { - const FormatToken *BeforeClosingBrace = nullptr; - if ((Left.isOneOf(tok::l_brace, TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_JavaScript && - Left.is(tok::l_paren))) && - Left.BlockKind != BK_Block && Left.MatchingParen) - BeforeClosingBrace = Left.MatchingParen->Previous; - else if (Right.MatchingParen && - (Right.MatchingParen->isOneOf(tok::l_brace, - TT_ArrayInitializerLSquare) || - (Style.Language == FormatStyle::LK_JavaScript && - Right.MatchingParen->is(tok::l_paren)))) - BeforeClosingBrace = &Left; - if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || - BeforeClosingBrace->isTrailingComment())) - return true; - } - - if (Right.is(tok::comment)) - return Left.BlockKind != BK_BracedInit && - Left.isNot(TT_CtorInitializerColon) && - (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); - if (Left.isTrailingComment()) - return true; - if (Right.Previous->IsUnterminatedLiteral) - return true; - if (Right.is(tok::lessless) && Right.Next && - Right.Previous->is(tok::string_literal) && - Right.Next->is(tok::string_literal)) - return true; - if (Right.Previous->ClosesTemplateDeclaration && - Right.Previous->MatchingParen && - Right.Previous->MatchingParen->NestingLevel == 0 && - Style.AlwaysBreakTemplateDeclarations == FormatStyle::BTDS_Yes) - return true; - if (Right.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && - !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - return true; - if (Right.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma && - !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) - return true; - // Break only if we have multiple inheritance. - if (Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma && - Right.is(TT_InheritanceComma)) - return true; - if (Right.is(tok::string_literal) && Right.TokenText.startswith("R\"")) - // Multiline raw string literals are special wrt. line breaks. The author - // has made a deliberate choice and might have aligned the contents of the - // string literal accordingly. Thus, we try keep existing line breaks. - return Right.IsMultiline && Right.NewlinesBefore > 0; - if ((Right.Previous->is(tok::l_brace) || - (Right.Previous->is(tok::less) && Right.Previous->Previous && - Right.Previous->Previous->is(tok::equal))) && - Right.NestingLevel == 1 && Style.Language == FormatStyle::LK_Proto) { - // Don't put enums or option definitions onto single lines in protocol - // buffers. - return true; - } - if (Right.is(TT_InlineASMBrace)) - return Right.HasUnescapedNewline; - if (isAllmanBrace(Left) || isAllmanBrace(Right)) - return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || - (Line.startsWith(tok::kw_typedef, tok::kw_enum) && - Style.BraceWrapping.AfterEnum) || - (Line.startsWith(tok::kw_class) && Style.BraceWrapping.AfterClass) || - (Line.startsWith(tok::kw_struct) && Style.BraceWrapping.AfterStruct); - if (Left.is(TT_ObjCBlockLBrace) && !Style.AllowShortBlocksOnASingleLine) - return true; - - if ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - Left.is(TT_LeadingJavaAnnotation) && - Right.isNot(TT_LeadingJavaAnnotation) && Right.isNot(tok::l_paren) && - (Line.Last->is(tok::l_brace) || Style.BreakAfterJavaFieldAnnotations)) - return true; - - if (Right.is(TT_ProtoExtensionLSquare)) - return true; - - // In text proto instances if a submessage contains at least 2 entries and at - // least one of them is a submessage, like A { ... B { ... } ... }, - // put all of the entries of A on separate lines by forcing the selector of - // the submessage B to be put on a newline. - // - // Example: these can stay on one line: - // a { scalar_1: 1 scalar_2: 2 } - // a { b { key: value } } - // - // and these entries need to be on a new line even if putting them all in one - // line is under the column limit: - // a { - // scalar: 1 - // b { key: value } - // } - // - // We enforce this by breaking before a submessage field that has previous - // siblings, *and* breaking before a field that follows a submessage field. - // - // Be careful to exclude the case [proto.ext] { ... } since the `]` is - // the TT_SelectorName there, but we don't want to break inside the brackets. - // - // Another edge case is @submessage { key: value }, which is a common - // substitution placeholder. In this case we want to keep `@` and `submessage` - // together. - // - // We ensure elsewhere that extensions are always on their own line. - if ((Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) && - Right.is(TT_SelectorName) && !Right.is(tok::r_square) && Right.Next) { - // Keep `@submessage` together in: - // @submessage { key: value } - if (Right.Previous && Right.Previous->is(tok::at)) - return false; - // Look for the scope opener after selector in cases like: - // selector { ... - // selector: { ... - // selector: @base { ... - FormatToken *LBrace = Right.Next; - if (LBrace && LBrace->is(tok::colon)) { - LBrace = LBrace->Next; - if (LBrace && LBrace->is(tok::at)) { - LBrace = LBrace->Next; - if (LBrace) - LBrace = LBrace->Next; - } - } - if (LBrace && - // The scope opener is one of {, [, <: - // selector { ... } - // selector [ ... ] - // selector < ... > - // - // In case of selector { ... }, the l_brace is TT_DictLiteral. - // In case of an empty selector {}, the l_brace is not TT_DictLiteral, - // so we check for immediately following r_brace. - ((LBrace->is(tok::l_brace) && - (LBrace->is(TT_DictLiteral) || - (LBrace->Next && LBrace->Next->is(tok::r_brace)))) || - LBrace->is(TT_ArrayInitializerLSquare) || LBrace->is(tok::less))) { - // If Left.ParameterCount is 0, then this submessage entry is not the - // first in its parent submessage, and we want to break before this entry. - // If Left.ParameterCount is greater than 0, then its parent submessage - // might contain 1 or more entries and we want to break before this entry - // if it contains at least 2 entries. We deal with this case later by - // detecting and breaking before the next entry in the parent submessage. - if (Left.ParameterCount == 0) - return true; - // However, if this submessage is the first entry in its parent - // submessage, Left.ParameterCount might be 1 in some cases. - // We deal with this case later by detecting an entry - // following a closing paren of this submessage. - } - - // If this is an entry immediately following a submessage, it will be - // preceded by a closing paren of that submessage, like in: - // left---. .---right - // v v - // sub: { ... } key: value - // If there was a comment between `}` an `key` above, then `key` would be - // put on a new line anyways. - if (Left.isOneOf(tok::r_brace, tok::greater, tok::r_square)) - return true; - } - - // Deal with lambda arguments in C++ - we want consistent line breaks whether - // they happen to be at arg0, arg1 or argN. The selection is a bit nuanced - // as aggressive line breaks are placed when the lambda is not the last arg. - if ((Style.Language == FormatStyle::LK_Cpp || - Style.Language == FormatStyle::LK_ObjC) && - Left.is(tok::l_paren) && Left.BlockParameterCount > 0 && - !Right.isOneOf(tok::l_paren, TT_LambdaLSquare)) { - // Multiple lambdas in the same function call force line breaks. - if (Left.BlockParameterCount > 1) - return true; - - // A lambda followed by another arg forces a line break. - if (!Left.Role) - return false; - auto Comma = Left.Role->lastComma(); - if (!Comma) - return false; - auto Next = Comma->getNextNonComment(); - if (!Next) - return false; - if (!Next->isOneOf(TT_LambdaLSquare, tok::l_brace, tok::caret)) - return true; - } - - return false; -} - -bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, - const FormatToken &Right) { - const FormatToken &Left = *Right.Previous; - - // Language-specific stuff. - if (Style.Language == FormatStyle::LK_Java) { - if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, - Keywords.kw_implements)) - return false; - if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, - Keywords.kw_implements)) - return true; - } else if (Style.Language == FormatStyle::LK_JavaScript) { - const FormatToken *NonComment = Right.getPreviousNonComment(); - if (NonComment && - NonComment->isOneOf( - tok::kw_return, Keywords.kw_yield, tok::kw_continue, tok::kw_break, - tok::kw_throw, Keywords.kw_interface, Keywords.kw_type, - tok::kw_static, tok::kw_public, tok::kw_private, tok::kw_protected, - Keywords.kw_readonly, Keywords.kw_abstract, Keywords.kw_get, - Keywords.kw_set, Keywords.kw_async, Keywords.kw_await)) - return false; // Otherwise automatic semicolon insertion would trigger. - if (Right.NestingLevel == 0 && - (Left.Tok.getIdentifierInfo() || - Left.isOneOf(tok::r_square, tok::r_paren)) && - Right.isOneOf(tok::l_square, tok::l_paren)) - return false; // Otherwise automatic semicolon insertion would trigger. - if (Left.is(TT_JsFatArrow) && Right.is(tok::l_brace)) - return false; - if (Left.is(TT_JsTypeColon)) - return true; - // Don't wrap between ":" and "!" of a strict prop init ("field!: type;"). - if (Left.is(tok::exclaim) && Right.is(tok::colon)) - return false; - // Look for is type annotations like: - // function f(): a is B { ... } - // Do not break before is in these cases. - if (Right.is(Keywords.kw_is)) { - const FormatToken* Next = Right.getNextNonComment(); - // If `is` is followed by a colon, it's likely that it's a dict key, so - // ignore it for this check. - // For example this is common in Polymer: - // Polymer({ - // is: 'name', - // ... - // }); - if (!Next || !Next->is(tok::colon)) - return false; - } - if (Left.is(Keywords.kw_in)) - return Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None; - if (Right.is(Keywords.kw_in)) - return Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None; - if (Right.is(Keywords.kw_as)) - return false; // must not break before as in 'x as type' casts - if (Right.isOneOf(Keywords.kw_extends, Keywords.kw_infer)) { - // extends and infer can appear as keywords in conditional types: - // https://www.typescriptlang.org/docs/handbook/release-notes/typescript-2-8.html#conditional-types - // do not break before them, as the expressions are subject to ASI. - return false; - } - if (Left.is(Keywords.kw_as)) - return true; - if (Left.is(TT_JsNonNullAssertion)) - return true; - if (Left.is(Keywords.kw_declare) && - Right.isOneOf(Keywords.kw_module, tok::kw_namespace, - Keywords.kw_function, tok::kw_class, tok::kw_enum, - Keywords.kw_interface, Keywords.kw_type, Keywords.kw_var, - Keywords.kw_let, tok::kw_const)) - // See grammar for 'declare' statements at: - // https://github.com/Microsoft/TypeScript/blob/master/doc/spec.md#A.10 - return false; - if (Left.isOneOf(Keywords.kw_module, tok::kw_namespace) && - Right.isOneOf(tok::identifier, tok::string_literal)) - return false; // must not break in "module foo { ...}" - if (Right.is(TT_TemplateString) && Right.closesScope()) - return false; - if (Left.is(TT_TemplateString) && Left.opensScope()) - return true; - } - - if (Left.is(tok::at)) - return false; - if (Left.Tok.getObjCKeywordID() == tok::objc_interface) - return false; - if (Left.isOneOf(TT_JavaAnnotation, TT_LeadingJavaAnnotation)) - return !Right.is(tok::l_paren); - if (Right.is(TT_PointerOrReference)) - return Line.IsMultiVariableDeclStmt || - (Style.PointerAlignment == FormatStyle::PAS_Right && - (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); - if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || - Right.is(tok::kw_operator)) - return true; - if (Left.is(TT_PointerOrReference)) - return false; - if (Right.isTrailingComment()) - // We rely on MustBreakBefore being set correctly here as we should not - // change the "binding" behavior of a comment. - // The first comment in a braced lists is always interpreted as belonging to - // the first list element. Otherwise, it should be placed outside of the - // list. - return Left.BlockKind == BK_BracedInit || - (Left.is(TT_CtorInitializerColon) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon); - if (Left.is(tok::question) && Right.is(tok::colon)) - return false; - if (Right.is(TT_ConditionalExpr) || Right.is(tok::question)) - return Style.BreakBeforeTernaryOperators; - if (Left.is(TT_ConditionalExpr) || Left.is(tok::question)) - return !Style.BreakBeforeTernaryOperators; - if (Left.is(TT_InheritanceColon)) - return Style.BreakInheritanceList == FormatStyle::BILS_AfterColon; - if (Right.is(TT_InheritanceColon)) - return Style.BreakInheritanceList != FormatStyle::BILS_AfterColon; - if (Right.is(TT_ObjCMethodExpr) && !Right.is(tok::r_square) && - Left.isNot(TT_SelectorName)) - return true; - - if (Right.is(tok::colon) && - !Right.isOneOf(TT_CtorInitializerColon, TT_InlineASMColon)) - return false; - if (Left.is(tok::colon) && Left.isOneOf(TT_DictLiteral, TT_ObjCMethodExpr)) { - if (Style.Language == FormatStyle::LK_Proto || - Style.Language == FormatStyle::LK_TextProto) { - if (!Style.AlwaysBreakBeforeMultilineStrings && Right.isStringLiteral()) - return false; - // Prevent cases like: - // - // submessage: - // { key: valueeeeeeeeeeee } - // - // when the snippet does not fit into one line. - // Prefer: - // - // submessage: { - // key: valueeeeeeeeeeee - // } - // - // instead, even if it is longer by one line. - // - // Note that this allows allows the "{" to go over the column limit - // when the column limit is just between ":" and "{", but that does - // not happen too often and alternative formattings in this case are - // not much better. - // - // The code covers the cases: - // - // submessage: { ... } - // submessage: < ... > - // repeated: [ ... ] - if (((Right.is(tok::l_brace) || Right.is(tok::less)) && - Right.is(TT_DictLiteral)) || - Right.is(TT_ArrayInitializerLSquare)) - return false; - } - return true; - } - if (Right.is(tok::r_square) && Right.MatchingParen && - Right.MatchingParen->is(TT_ProtoExtensionLSquare)) - return false; - if (Right.is(TT_SelectorName) || (Right.is(tok::identifier) && Right.Next && - Right.Next->is(TT_ObjCMethodExpr))) - return Left.isNot(tok::period); // FIXME: Properly parse ObjC calls. - if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) - return true; - if (Left.ClosesTemplateDeclaration || Left.is(TT_FunctionAnnotationRParen)) - return true; - if (Right.isOneOf(TT_RangeBasedForLoopColon, TT_OverloadedOperatorLParen, - TT_OverloadedOperator)) - return false; - if (Left.is(TT_RangeBasedForLoopColon)) - return true; - if (Right.is(TT_RangeBasedForLoopColon)) - return false; - if (Left.is(TT_TemplateCloser) && Right.is(TT_TemplateOpener)) - return true; - if (Left.isOneOf(TT_TemplateCloser, TT_UnaryOperator) || - Left.is(tok::kw_operator)) - return false; - if (Left.is(tok::equal) && !Right.isOneOf(tok::kw_default, tok::kw_delete) && - Line.Type == LT_VirtualFunctionDecl && Left.NestingLevel == 0) - return false; - if (Left.is(tok::equal) && Right.is(tok::l_brace) && - !Style.Cpp11BracedListStyle) - return false; - if (Left.is(tok::l_paren) && Left.is(TT_AttributeParen)) - return false; - if (Left.is(tok::l_paren) && Left.Previous && - (Left.Previous->isOneOf(TT_BinaryOperator, TT_CastRParen))) - return false; - if (Right.is(TT_ImplicitStringLiteral)) - return false; - - if (Right.is(tok::r_paren) || Right.is(TT_TemplateCloser)) - return false; - if (Right.is(tok::r_square) && Right.MatchingParen && - Right.MatchingParen->is(TT_LambdaLSquare)) - return false; - - // We only break before r_brace if there was a corresponding break before - // the l_brace, which is tracked by BreakBeforeClosingBrace. - if (Right.is(tok::r_brace)) - return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; - - // Allow breaking after a trailing annotation, e.g. after a method - // declaration. - if (Left.is(TT_TrailingAnnotation)) - return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, - tok::less, tok::coloncolon); - - if (Right.is(tok::kw___attribute) || - (Right.is(tok::l_square) && Right.is(TT_AttributeSquare))) - return true; - - if (Left.is(tok::identifier) && Right.is(tok::string_literal)) - return true; - - if (Right.is(tok::identifier) && Right.Next && Right.Next->is(TT_DictLiteral)) - return true; - - if (Left.is(TT_CtorInitializerColon)) - return Style.BreakConstructorInitializers == FormatStyle::BCIS_AfterColon; - if (Right.is(TT_CtorInitializerColon)) - return Style.BreakConstructorInitializers != FormatStyle::BCIS_AfterColon; - if (Left.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) - return false; - if (Right.is(TT_CtorInitializerComma) && - Style.BreakConstructorInitializers == FormatStyle::BCIS_BeforeComma) - return true; - if (Left.is(TT_InheritanceComma) && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) - return false; - if (Right.is(TT_InheritanceComma) && - Style.BreakInheritanceList == FormatStyle::BILS_BeforeComma) - return true; - if ((Left.is(tok::greater) && Right.is(tok::greater)) || - (Left.is(tok::less) && Right.is(tok::less))) - return false; - if (Right.is(TT_BinaryOperator) && - Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && - (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || - Right.getPrecedence() != prec::Assignment)) - return true; - if (Left.is(TT_ArrayInitializerLSquare)) - return true; - if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) - return true; - if ((Left.isBinaryOperator() || Left.is(TT_BinaryOperator)) && - !Left.isOneOf(tok::arrowstar, tok::lessless) && - Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && - (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || - Left.getPrecedence() == prec::Assignment)) - return true; - if ((Left.is(TT_AttributeSquare) && Right.is(tok::l_square)) || - (Left.is(tok::r_square) && Right.is(TT_AttributeSquare))) - return false; - return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, - tok::kw_class, tok::kw_struct, tok::comment) || - Right.isMemberAccess() || - Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, - tok::colon, tok::l_square, tok::at) || - (Left.is(tok::r_paren) && - Right.isOneOf(tok::identifier, tok::kw_const)) || - (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || - (Left.is(TT_TemplateOpener) && !Right.is(TT_TemplateCloser)); -} - -void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { - llvm::errs() << "AnnotatedTokens(L=" << Line.Level << "):\n"; - const FormatToken *Tok = Line.First; - while (Tok) { - llvm::errs() << " M=" << Tok->MustBreakBefore - << " C=" << Tok->CanBreakBefore - << " T=" << getTokenTypeName(Tok->Type) - << " S=" << Tok->SpacesRequiredBefore - << " B=" << Tok->BlockParameterCount - << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty - << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength - << " PPK=" << Tok->PackingKind << " FakeLParens="; - for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) - llvm::errs() << Tok->FakeLParens[i] << "/"; - llvm::errs() << " FakeRParens=" << Tok->FakeRParens; - llvm::errs() << " II=" << Tok->Tok.getIdentifierInfo(); - llvm::errs() << " Text='" << Tok->TokenText << "'\n"; - if (!Tok->Next) - assert(Tok == Line.Last); - Tok = Tok->Next; - } - llvm::errs() << "----\n"; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.h b/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.h deleted file mode 100644 index e2f2c469d26..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/TokenAnnotator.h +++ /dev/null @@ -1,191 +0,0 @@ -//===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements a token annotator, i.e. creates -/// \c AnnotatedTokens out of \c FormatTokens with required extra information. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H -#define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H - -#include "UnwrappedLineParser.h" -#include "clang/Format/Format.h" - -namespace clang { -class SourceManager; - -namespace format { - -enum LineType { - LT_Invalid, - LT_ImportStatement, - LT_ObjCDecl, // An @interface, @implementation, or @protocol line. - LT_ObjCMethodDecl, - LT_ObjCProperty, // An @property line. - LT_Other, - LT_PreprocessorDirective, - LT_VirtualFunctionDecl -}; - -class AnnotatedLine { -public: - AnnotatedLine(const UnwrappedLine &Line) - : First(Line.Tokens.front().Tok), Level(Line.Level), - MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), - MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), - InPPDirective(Line.InPPDirective), - MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), - IsMultiVariableDeclStmt(false), Affected(false), - LeadingEmptyLinesAffected(false), ChildrenAffected(false), - FirstStartColumn(Line.FirstStartColumn) { - assert(!Line.Tokens.empty()); - - // Calculate Next and Previous for all tokens. Note that we must overwrite - // Next and Previous for every token, as previous formatting runs might have - // left them in a different state. - First->Previous = nullptr; - FormatToken *Current = First; - for (std::list<UnwrappedLineNode>::const_iterator I = ++Line.Tokens.begin(), - E = Line.Tokens.end(); - I != E; ++I) { - const UnwrappedLineNode &Node = *I; - Current->Next = I->Tok; - I->Tok->Previous = Current; - Current = Current->Next; - Current->Children.clear(); - for (const auto &Child : Node.Children) { - Children.push_back(new AnnotatedLine(Child)); - Current->Children.push_back(Children.back()); - } - } - Last = Current; - Last->Next = nullptr; - } - - ~AnnotatedLine() { - for (unsigned i = 0, e = Children.size(); i != e; ++i) { - delete Children[i]; - } - FormatToken *Current = First; - while (Current) { - Current->Children.clear(); - Current->Role.reset(); - Current = Current->Next; - } - } - - /// \c true if this line starts with the given tokens in order, ignoring - /// comments. - template <typename... Ts> bool startsWith(Ts... Tokens) const { - return First && First->startsSequence(Tokens...); - } - - /// \c true if this line ends with the given tokens in reversed order, - /// ignoring comments. - /// For example, given tokens [T1, T2, T3, ...], the function returns true if - /// this line is like "... T3 T2 T1". - template <typename... Ts> bool endsWith(Ts... Tokens) const { - return Last && Last->endsSequence(Tokens...); - } - - /// \c true if this line looks like a function definition instead of a - /// function declaration. Asserts MightBeFunctionDecl. - bool mightBeFunctionDefinition() const { - assert(MightBeFunctionDecl); - // FIXME: Line.Last points to other characters than tok::semi - // and tok::lbrace. - return !Last->isOneOf(tok::semi, tok::comment); - } - - /// \c true if this line starts a namespace definition. - bool startsWithNamespace() const { - return startsWith(tok::kw_namespace) || - startsWith(tok::kw_inline, tok::kw_namespace) || - startsWith(tok::kw_export, tok::kw_namespace); - } - - FormatToken *First; - FormatToken *Last; - - SmallVector<AnnotatedLine *, 0> Children; - - LineType Type; - unsigned Level; - size_t MatchingOpeningBlockLineIndex; - size_t MatchingClosingBlockLineIndex; - bool InPPDirective; - bool MustBeDeclaration; - bool MightBeFunctionDecl; - bool IsMultiVariableDeclStmt; - - /// \c True if this line should be formatted, i.e. intersects directly or - /// indirectly with one of the input ranges. - bool Affected; - - /// \c True if the leading empty lines of this line intersect with one of the - /// input ranges. - bool LeadingEmptyLinesAffected; - - /// \c True if one of this line's children intersects with an input range. - bool ChildrenAffected; - - unsigned FirstStartColumn; - -private: - // Disallow copying. - AnnotatedLine(const AnnotatedLine &) = delete; - void operator=(const AnnotatedLine &) = delete; -}; - -/// Determines extra information about the tokens comprising an -/// \c UnwrappedLine. -class TokenAnnotator { -public: - TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) - : Style(Style), Keywords(Keywords) {} - - /// Adapts the indent levels of comment lines to the indent of the - /// subsequent line. - // FIXME: Can/should this be done in the UnwrappedLineParser? - void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines); - - void annotate(AnnotatedLine &Line); - void calculateFormattingInformation(AnnotatedLine &Line); - -private: - /// Calculate the penalty for splitting before \c Tok. - unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, - bool InFunctionDecl); - - bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, - const FormatToken &Right); - - bool spaceRequiredBefore(const AnnotatedLine &Line, const FormatToken &Right); - - bool mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); - - bool canBreakBefore(const AnnotatedLine &Line, const FormatToken &Right); - - bool mustBreakForReturnType(const AnnotatedLine &Line) const; - - void printDebugInfo(const AnnotatedLine &Line); - - void calculateUnbreakableTailLengths(AnnotatedLine &Line); - - const FormatStyle &Style; - - const AdditionalKeywords &Keywords; -}; - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp b/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp deleted file mode 100644 index 6b6a9aff461..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.cpp +++ /dev/null @@ -1,1212 +0,0 @@ -//===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "NamespaceEndCommentsFixer.h" -#include "UnwrappedLineFormatter.h" -#include "WhitespaceManager.h" -#include "llvm/Support/Debug.h" -#include <queue> - -#define DEBUG_TYPE "format-formatter" - -namespace clang { -namespace format { - -namespace { - -bool startsExternCBlock(const AnnotatedLine &Line) { - const FormatToken *Next = Line.First->getNextNonComment(); - const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr; - return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() && - NextNext && NextNext->is(tok::l_brace); -} - -/// Tracks the indent level of \c AnnotatedLines across levels. -/// -/// \c nextLine must be called for each \c AnnotatedLine, after which \c -/// getIndent() will return the indent for the last line \c nextLine was called -/// with. -/// If the line is not formatted (and thus the indent does not change), calling -/// \c adjustToUnmodifiedLine after the call to \c nextLine will cause -/// subsequent lines on the same level to be indented at the same level as the -/// given line. -class LevelIndentTracker { -public: - LevelIndentTracker(const FormatStyle &Style, - const AdditionalKeywords &Keywords, unsigned StartLevel, - int AdditionalIndent) - : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) { - for (unsigned i = 0; i != StartLevel; ++i) - IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent); - } - - /// Returns the indent for the current line. - unsigned getIndent() const { return Indent; } - - /// Update the indent state given that \p Line is going to be formatted - /// next. - void nextLine(const AnnotatedLine &Line) { - Offset = getIndentOffset(*Line.First); - // Update the indent level cache size so that we can rely on it - // having the right size in adjustToUnmodifiedline. - while (IndentForLevel.size() <= Line.Level) - IndentForLevel.push_back(-1); - if (Line.InPPDirective) { - Indent = Line.Level * Style.IndentWidth + AdditionalIndent; - } else { - IndentForLevel.resize(Line.Level + 1); - Indent = getIndent(IndentForLevel, Line.Level); - } - if (static_cast<int>(Indent) + Offset >= 0) - Indent += Offset; - } - - /// Update the indent state given that \p Line indent should be - /// skipped. - void skipLine(const AnnotatedLine &Line) { - while (IndentForLevel.size() <= Line.Level) - IndentForLevel.push_back(Indent); - } - - /// Update the level indent to adapt to the given \p Line. - /// - /// When a line is not formatted, we move the subsequent lines on the same - /// level to the same indent. - /// Note that \c nextLine must have been called before this method. - void adjustToUnmodifiedLine(const AnnotatedLine &Line) { - unsigned LevelIndent = Line.First->OriginalColumn; - if (static_cast<int>(LevelIndent) - Offset >= 0) - LevelIndent -= Offset; - if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) && - !Line.InPPDirective) - IndentForLevel[Line.Level] = LevelIndent; - } - -private: - /// Get the offset of the line relatively to the level. - /// - /// For example, 'public:' labels in classes are offset by 1 or 2 - /// characters to the left from their level. - int getIndentOffset(const FormatToken &RootToken) { - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) - return 0; - if (RootToken.isAccessSpecifier(false) || - RootToken.isObjCAccessSpecifier() || - (RootToken.isOneOf(Keywords.kw_signals, Keywords.kw_qsignals) && - RootToken.Next && RootToken.Next->is(tok::colon))) - return Style.AccessModifierOffset; - return 0; - } - - /// Get the indent of \p Level from \p IndentForLevel. - /// - /// \p IndentForLevel must contain the indent for the level \c l - /// at \p IndentForLevel[l], or a value < 0 if the indent for - /// that level is unknown. - unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) { - if (IndentForLevel[Level] != -1) - return IndentForLevel[Level]; - if (Level == 0) - return 0; - return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; - } - - const FormatStyle &Style; - const AdditionalKeywords &Keywords; - const unsigned AdditionalIndent; - - /// The indent in characters for each level. - std::vector<int> IndentForLevel; - - /// Offset of the current line relative to the indent level. - /// - /// For example, the 'public' keywords is often indented with a negative - /// offset. - int Offset = 0; - - /// The current line's indent. - unsigned Indent = 0; -}; - -bool isNamespaceDeclaration(const AnnotatedLine *Line) { - const FormatToken *NamespaceTok = Line->First; - return NamespaceTok && NamespaceTok->getNamespaceToken(); -} - -bool isEndOfNamespace(const AnnotatedLine *Line, - const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { - if (!Line->startsWith(tok::r_brace)) - return false; - size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; - if (StartLineIndex == UnwrappedLine::kInvalidIndex) - return false; - assert(StartLineIndex < AnnotatedLines.size()); - return isNamespaceDeclaration(AnnotatedLines[StartLineIndex]); -} - -class LineJoiner { -public: - LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords, - const SmallVectorImpl<AnnotatedLine *> &Lines) - : Style(Style), Keywords(Keywords), End(Lines.end()), Next(Lines.begin()), - AnnotatedLines(Lines) {} - - /// Returns the next line, merging multiple lines into one if possible. - const AnnotatedLine *getNextMergedLine(bool DryRun, - LevelIndentTracker &IndentTracker) { - if (Next == End) - return nullptr; - const AnnotatedLine *Current = *Next; - IndentTracker.nextLine(*Current); - unsigned MergedLines = tryFitMultipleLinesInOne(IndentTracker, Next, End); - if (MergedLines > 0 && Style.ColumnLimit == 0) - // Disallow line merging if there is a break at the start of one of the - // input lines. - for (unsigned i = 0; i < MergedLines; ++i) - if (Next[i + 1]->First->NewlinesBefore > 0) - MergedLines = 0; - if (!DryRun) - for (unsigned i = 0; i < MergedLines; ++i) - join(*Next[0], *Next[i + 1]); - Next = Next + MergedLines + 1; - return Current; - } - -private: - /// Calculates how many lines can be merged into 1 starting at \p I. - unsigned - tryFitMultipleLinesInOne(LevelIndentTracker &IndentTracker, - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E) { - const unsigned Indent = IndentTracker.getIndent(); - - // Can't join the last line with anything. - if (I + 1 == E) - return 0; - // We can never merge stuff if there are trailing line comments. - const AnnotatedLine *TheLine = *I; - if (TheLine->Last->is(TT_LineComment)) - return 0; - if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore) - return 0; - if (TheLine->InPPDirective && - (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline)) - return 0; - - if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit) - return 0; - - unsigned Limit = - Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent; - // If we already exceed the column limit, we set 'Limit' to 0. The different - // tryMerge..() functions can then decide whether to still do merging. - Limit = TheLine->Last->TotalLength > Limit - ? 0 - : Limit - TheLine->Last->TotalLength; - - if (TheLine->Last->is(TT_FunctionLBrace) && - TheLine->First == TheLine->Last && - !Style.BraceWrapping.SplitEmptyFunction && - I[1]->First->is(tok::r_brace)) - return tryMergeSimpleBlock(I, E, Limit); - - // Handle empty record blocks where the brace has already been wrapped - if (TheLine->Last->is(tok::l_brace) && TheLine->First == TheLine->Last && - I != AnnotatedLines.begin()) { - bool EmptyBlock = I[1]->First->is(tok::r_brace); - - const FormatToken *Tok = I[-1]->First; - if (Tok && Tok->is(tok::comment)) - Tok = Tok->getNextNonComment(); - - if (Tok && Tok->getNamespaceToken()) - return !Style.BraceWrapping.SplitEmptyNamespace && EmptyBlock - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - - if (Tok && Tok->is(tok::kw_typedef)) - Tok = Tok->getNextNonComment(); - if (Tok && Tok->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union, - tok::kw_extern, Keywords.kw_interface)) - return !Style.BraceWrapping.SplitEmptyRecord && EmptyBlock - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - - // FIXME: TheLine->Level != 0 might or might not be the right check to do. - // If necessary, change to something smarter. - bool MergeShortFunctions = - Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All || - (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - I[1]->First->is(tok::r_brace)) || - (Style.AllowShortFunctionsOnASingleLine & FormatStyle::SFS_InlineOnly && - TheLine->Level != 0); - - if (Style.CompactNamespaces) { - if (isNamespaceDeclaration(TheLine)) { - int i = 0; - unsigned closingLine = TheLine->MatchingClosingBlockLineIndex - 1; - for (; I + 1 + i != E && isNamespaceDeclaration(I[i + 1]) && - closingLine == I[i + 1]->MatchingClosingBlockLineIndex && - I[i + 1]->Last->TotalLength < Limit; - i++, closingLine--) { - // No extra indent for compacted namespaces - IndentTracker.skipLine(*I[i + 1]); - - Limit -= I[i + 1]->Last->TotalLength; - } - return i; - } - - if (isEndOfNamespace(TheLine, AnnotatedLines)) { - int i = 0; - unsigned openingLine = TheLine->MatchingOpeningBlockLineIndex - 1; - for (; I + 1 + i != E && isEndOfNamespace(I[i + 1], AnnotatedLines) && - openingLine == I[i + 1]->MatchingOpeningBlockLineIndex; - i++, openingLine--) { - // No space between consecutive braces - I[i + 1]->First->SpacesRequiredBefore = !I[i]->Last->is(tok::r_brace); - - // Indent like the outer-most namespace - IndentTracker.nextLine(*I[i + 1]); - } - return i; - } - } - - // Try to merge a function block with left brace unwrapped - if (TheLine->Last->is(TT_FunctionLBrace) && - TheLine->First != TheLine->Last) { - return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0; - } - // Try to merge a control statement block with left brace unwrapped - if (TheLine->Last->is(tok::l_brace) && TheLine->First != TheLine->Last && - TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { - return Style.AllowShortBlocksOnASingleLine - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - // Try to merge a control statement block with left brace wrapped - if (I[1]->First->is(tok::l_brace) && - TheLine->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { - return Style.BraceWrapping.AfterControlStatement - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - // Try to merge either empty or one-line block if is precedeed by control - // statement token - if (TheLine->First->is(tok::l_brace) && TheLine->First == TheLine->Last && - I != AnnotatedLines.begin() && - I[-1]->First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_for)) { - unsigned MergedLines = 0; - if (Style.AllowShortBlocksOnASingleLine) { - MergedLines = tryMergeSimpleBlock(I - 1, E, Limit); - // If we managed to merge the block, discard the first merged line - // since we are merging starting from I. - if (MergedLines > 0) - --MergedLines; - } - return MergedLines; - } - // Don't merge block with left brace wrapped after ObjC special blocks - if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - I[-1]->First->is(tok::at) && I[-1]->First->Next) { - tok::ObjCKeywordKind kwId = I[-1]->First->Next->Tok.getObjCKeywordID(); - if (kwId == clang::tok::objc_autoreleasepool || - kwId == clang::tok::objc_synchronized) - return 0; - } - // Don't merge block with left brace wrapped after case labels - if (TheLine->First->is(tok::l_brace) && I != AnnotatedLines.begin() && - I[-1]->First->isOneOf(tok::kw_case, tok::kw_default)) - return 0; - // Try to merge a block with left brace wrapped that wasn't yet covered - if (TheLine->Last->is(tok::l_brace)) { - return !Style.BraceWrapping.AfterFunction || - (I[1]->First->is(tok::r_brace) && - !Style.BraceWrapping.SplitEmptyRecord) - ? tryMergeSimpleBlock(I, E, Limit) - : 0; - } - // Try to merge a function block with left brace wrapped - if (I[1]->First->is(TT_FunctionLBrace) && - Style.BraceWrapping.AfterFunction) { - if (I[1]->Last->is(TT_LineComment)) - return 0; - - // Check for Limit <= 2 to account for the " {". - if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine))) - return 0; - Limit -= 2; - - unsigned MergedLines = 0; - if (MergeShortFunctions || - (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty && - I[1]->First == I[1]->Last && I + 2 != E && - I[2]->First->is(tok::r_brace))) { - MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); - // If we managed to merge the block, count the function header, which is - // on a separate line. - if (MergedLines > 0) - ++MergedLines; - } - return MergedLines; - } - if (TheLine->First->is(tok::kw_if)) { - return Style.AllowShortIfStatementsOnASingleLine - ? tryMergeSimpleControlStatement(I, E, Limit) - : 0; - } - if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) { - return Style.AllowShortLoopsOnASingleLine - ? tryMergeSimpleControlStatement(I, E, Limit) - : 0; - } - if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) { - return Style.AllowShortCaseLabelsOnASingleLine - ? tryMergeShortCaseLabels(I, E, Limit) - : 0; - } - if (TheLine->InPPDirective && - (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) { - return tryMergeSimplePPDirective(I, E, Limit); - } - return 0; - } - - unsigned - tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - if (Limit == 0) - return 0; - if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline) - return 0; - if (1 + I[1]->Last->TotalLength > Limit) - return 0; - return 1; - } - - unsigned tryMergeSimpleControlStatement( - SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) { - if (Limit == 0) - return 0; - if (Style.BraceWrapping.AfterControlStatement && - (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine)) - return 0; - if (I[1]->InPPDirective != (*I)->InPPDirective || - (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline)) - return 0; - Limit = limitConsideringMacros(I + 1, E, Limit); - AnnotatedLine &Line = **I; - if (Line.Last->isNot(tok::r_paren)) - return 0; - if (1 + I[1]->Last->TotalLength > Limit) - return 0; - if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while, - TT_LineComment)) - return 0; - // Only inline simple if's (no nested if or else). - if (I + 2 != E && Line.startsWith(tok::kw_if) && - I[2]->First->is(tok::kw_else)) - return 0; - return 1; - } - - unsigned - tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - if (Limit == 0 || I + 1 == E || - I[1]->First->isOneOf(tok::kw_case, tok::kw_default)) - return 0; - if (I[0]->Last->is(tok::l_brace) || I[1]->First->is(tok::l_brace)) - return 0; - unsigned NumStmts = 0; - unsigned Length = 0; - bool EndsWithComment = false; - bool InPPDirective = I[0]->InPPDirective; - const unsigned Level = I[0]->Level; - for (; NumStmts < 3; ++NumStmts) { - if (I + 1 + NumStmts == E) - break; - const AnnotatedLine *Line = I[1 + NumStmts]; - if (Line->InPPDirective != InPPDirective) - break; - if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) - break; - if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch, - tok::kw_while) || - EndsWithComment) - return 0; - if (Line->First->is(tok::comment)) { - if (Level != Line->Level) - return 0; - SmallVectorImpl<AnnotatedLine *>::const_iterator J = I + 2 + NumStmts; - for (; J != E; ++J) { - Line = *J; - if (Line->InPPDirective != InPPDirective) - break; - if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace)) - break; - if (Line->First->isNot(tok::comment) || Level != Line->Level) - return 0; - } - break; - } - if (Line->Last->is(tok::comment)) - EndsWithComment = true; - Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space. - } - if (NumStmts == 0 || NumStmts == 3 || Length > Limit) - return 0; - return NumStmts; - } - - unsigned - tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - AnnotatedLine &Line = **I; - - // Don't merge ObjC @ keywords and methods. - // FIXME: If an option to allow short exception handling clauses on a single - // line is added, change this to not return for @try and friends. - if (Style.Language != FormatStyle::LK_Java && - Line.First->isOneOf(tok::at, tok::minus, tok::plus)) - return 0; - - // Check that the current line allows merging. This depends on whether we - // are in a control flow statements as well as several style flags. - if (Line.First->isOneOf(tok::kw_else, tok::kw_case) || - (Line.First->Next && Line.First->Next->is(tok::kw_else))) - return 0; - // default: in switch statement - if (Line.First->is(tok::kw_default)) { - const FormatToken *Tok = Line.First->getNextNonComment(); - if (Tok && Tok->is(tok::colon)) - return 0; - } - if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try, - tok::kw___try, tok::kw_catch, tok::kw___finally, - tok::kw_for, tok::r_brace, Keywords.kw___except)) { - if (!Style.AllowShortBlocksOnASingleLine) - return 0; - // Don't merge when we can't except the case when - // the control statement block is empty - if (!Style.AllowShortIfStatementsOnASingleLine && - Line.startsWith(tok::kw_if) && - !Style.BraceWrapping.AfterControlStatement && - !I[1]->First->is(tok::r_brace)) - return 0; - if (!Style.AllowShortIfStatementsOnASingleLine && - Line.startsWith(tok::kw_if) && - Style.BraceWrapping.AfterControlStatement && I + 2 != E && - !I[2]->First->is(tok::r_brace)) - return 0; - if (!Style.AllowShortLoopsOnASingleLine && - Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) && - !Style.BraceWrapping.AfterControlStatement && - !I[1]->First->is(tok::r_brace)) - return 0; - if (!Style.AllowShortLoopsOnASingleLine && - Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for) && - Style.BraceWrapping.AfterControlStatement && I + 2 != E && - !I[2]->First->is(tok::r_brace)) - return 0; - // FIXME: Consider an option to allow short exception handling clauses on - // a single line. - // FIXME: This isn't covered by tests. - // FIXME: For catch, __except, __finally the first token on the line - // is '}', so this isn't correct here. - if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch, - Keywords.kw___except, tok::kw___finally)) - return 0; - } - - if (Line.Last->is(tok::l_brace)) { - FormatToken *Tok = I[1]->First; - if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && - (Tok->getNextNonComment() == nullptr || - Tok->getNextNonComment()->is(tok::semi))) { - // We merge empty blocks even if the line exceeds the column limit. - Tok->SpacesRequiredBefore = 0; - Tok->CanBreakBefore = true; - return 1; - } else if (Limit != 0 && !Line.startsWithNamespace() && - !startsExternCBlock(Line)) { - // We don't merge short records. - FormatToken *RecordTok = Line.First; - // Skip record modifiers. - while (RecordTok->Next && - RecordTok->isOneOf(tok::kw_typedef, tok::kw_export, - Keywords.kw_declare, Keywords.kw_abstract, - tok::kw_default)) - RecordTok = RecordTok->Next; - if (RecordTok && - RecordTok->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct, - Keywords.kw_interface)) - return 0; - - // Check that we still have three lines and they fit into the limit. - if (I + 2 == E || I[2]->Type == LT_Invalid) - return 0; - Limit = limitConsideringMacros(I + 2, E, Limit); - - if (!nextTwoLinesFitInto(I, Limit)) - return 0; - - // Second, check that the next line does not contain any braces - if it - // does, readability declines when putting it into a single line. - if (I[1]->Last->is(TT_LineComment)) - return 0; - do { - if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit) - return 0; - Tok = Tok->Next; - } while (Tok); - - // Last, check that the third line starts with a closing brace. - Tok = I[2]->First; - if (Tok->isNot(tok::r_brace)) - return 0; - - // Don't merge "if (a) { .. } else {". - if (Tok->Next && Tok->Next->is(tok::kw_else)) - return 0; - - return 2; - } - } else if (I[1]->First->is(tok::l_brace)) { - if (I[1]->Last->is(TT_LineComment)) - return 0; - - // Check for Limit <= 2 to account for the " {". - if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(*I))) - return 0; - Limit -= 2; - unsigned MergedLines = 0; - if (Style.AllowShortBlocksOnASingleLine || - (I[1]->First == I[1]->Last && I + 2 != E && - I[2]->First->is(tok::r_brace))) { - MergedLines = tryMergeSimpleBlock(I + 1, E, Limit); - // If we managed to merge the block, count the statement header, which - // is on a separate line. - if (MergedLines > 0) - ++MergedLines; - } - return MergedLines; - } - return 0; - } - - /// Returns the modified column limit for \p I if it is inside a macro and - /// needs a trailing '\'. - unsigned - limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - SmallVectorImpl<AnnotatedLine *>::const_iterator E, - unsigned Limit) { - if (I[0]->InPPDirective && I + 1 != E && - !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) { - return Limit < 2 ? 0 : Limit - 2; - } - return Limit; - } - - bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I, - unsigned Limit) { - if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore) - return false; - return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit; - } - - bool containsMustBreak(const AnnotatedLine *Line) { - for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) { - if (Tok->MustBreakBefore) - return true; - } - return false; - } - - void join(AnnotatedLine &A, const AnnotatedLine &B) { - assert(!A.Last->Next); - assert(!B.First->Previous); - if (B.Affected) - A.Affected = true; - A.Last->Next = B.First; - B.First->Previous = A.Last; - B.First->CanBreakBefore = true; - unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; - for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { - Tok->TotalLength += LengthA; - A.Last = Tok; - } - } - - const FormatStyle &Style; - const AdditionalKeywords &Keywords; - const SmallVectorImpl<AnnotatedLine *>::const_iterator End; - - SmallVectorImpl<AnnotatedLine *>::const_iterator Next; - const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines; -}; - -static void markFinalized(FormatToken *Tok) { - for (; Tok; Tok = Tok->Next) { - Tok->Finalized = true; - for (AnnotatedLine *Child : Tok->Children) - markFinalized(Child->First); - } -} - -#ifndef NDEBUG -static void printLineState(const LineState &State) { - llvm::dbgs() << "State: "; - for (const ParenState &P : State.Stack) { - llvm::dbgs() << (P.Tok ? P.Tok->TokenText : "F") << "|" << P.Indent << "|" - << P.LastSpace << "|" << P.NestedBlockIndent << " "; - } - llvm::dbgs() << State.NextToken->TokenText << "\n"; -} -#endif - -/// Base class for classes that format one \c AnnotatedLine. -class LineFormatter { -public: - LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces, - const FormatStyle &Style, - UnwrappedLineFormatter *BlockFormatter) - : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - BlockFormatter(BlockFormatter) {} - virtual ~LineFormatter() {} - - /// Formats an \c AnnotatedLine and returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - virtual unsigned formatLine(const AnnotatedLine &Line, - unsigned FirstIndent, - unsigned FirstStartColumn, - bool DryRun) = 0; - -protected: - /// If the \p State's next token is an r_brace closing a nested block, - /// format the nested block before it. - /// - /// Returns \c true if all children could be placed successfully and adapts - /// \p Penalty as well as \p State. If \p DryRun is false, also directly - /// creates changes using \c Whitespaces. - /// - /// The crucial idea here is that children always get formatted upon - /// encountering the closing brace right after the nested block. Now, if we - /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is - /// \c false), the entire block has to be kept on the same line (which is only - /// possible if it fits on the line, only contains a single statement, etc. - /// - /// If \p NewLine is true, we format the nested block on separate lines, i.e. - /// break after the "{", format all lines with correct indentation and the put - /// the closing "}" on yet another new line. - /// - /// This enables us to keep the simple structure of the - /// \c UnwrappedLineFormatter, where we only have two options for each token: - /// break or don't break. - bool formatChildren(LineState &State, bool NewLine, bool DryRun, - unsigned &Penalty) { - const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); - FormatToken &Previous = *State.NextToken->Previous; - if (!LBrace || LBrace->isNot(tok::l_brace) || - LBrace->BlockKind != BK_Block || Previous.Children.size() == 0) - // The previous token does not open a block. Nothing to do. We don't - // assert so that we can simply call this function for all tokens. - return true; - - if (NewLine) { - int AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; - - Penalty += - BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, - /*FixBadIndentation=*/true); - return true; - } - - if (Previous.Children[0]->First->MustBreakBefore) - return false; - - // Cannot merge into one line if this line ends on a comment. - if (Previous.is(tok::comment)) - return false; - - // Cannot merge multiple statements into a single line. - if (Previous.Children.size() > 1) - return false; - - const AnnotatedLine *Child = Previous.Children[0]; - // We can't put the closing "}" on a line with a trailing comment. - if (Child->Last->isTrailingComment()) - return false; - - // If the child line exceeds the column limit, we wouldn't want to merge it. - // We add +2 for the trailing " }". - if (Style.ColumnLimit > 0 && - Child->Last->TotalLength + State.Column + 2 > Style.ColumnLimit) - return false; - - if (!DryRun) { - Whitespaces->replaceWhitespace( - *Child->First, /*Newlines=*/0, /*Spaces=*/1, - /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective); - } - Penalty += - formatLine(*Child, State.Column + 1, /*FirstStartColumn=*/0, DryRun); - - State.Column += 1 + Child->Last->TotalLength; - return true; - } - - ContinuationIndenter *Indenter; - -private: - WhitespaceManager *Whitespaces; - const FormatStyle &Style; - UnwrappedLineFormatter *BlockFormatter; -}; - -/// Formatter that keeps the existing line breaks. -class NoColumnLimitLineFormatter : public LineFormatter { -public: - NoColumnLimitLineFormatter(ContinuationIndenter *Indenter, - WhitespaceManager *Whitespaces, - const FormatStyle &Style, - UnwrappedLineFormatter *BlockFormatter) - : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - - /// Formats the line, simply keeping all of the input's line breaking - /// decisions. - unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - unsigned FirstStartColumn, bool DryRun) override { - assert(!DryRun); - LineState State = Indenter->getInitialState(FirstIndent, FirstStartColumn, - &Line, /*DryRun=*/false); - while (State.NextToken) { - bool Newline = - Indenter->mustBreak(State) || - (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0); - unsigned Penalty = 0; - formatChildren(State, Newline, /*DryRun=*/false, Penalty); - Indenter->addTokenToState(State, Newline, /*DryRun=*/false); - } - return 0; - } -}; - -/// Formatter that puts all tokens into a single line without breaks. -class NoLineBreakFormatter : public LineFormatter { -public: - NoLineBreakFormatter(ContinuationIndenter *Indenter, - WhitespaceManager *Whitespaces, const FormatStyle &Style, - UnwrappedLineFormatter *BlockFormatter) - : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - - /// Puts all tokens into a single line. - unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - unsigned FirstStartColumn, bool DryRun) override { - unsigned Penalty = 0; - LineState State = - Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); - while (State.NextToken) { - formatChildren(State, /*Newline=*/false, DryRun, Penalty); - Indenter->addTokenToState( - State, /*Newline=*/State.NextToken->MustBreakBefore, DryRun); - } - return Penalty; - } -}; - -/// Finds the best way to break lines. -class OptimizingLineFormatter : public LineFormatter { -public: - OptimizingLineFormatter(ContinuationIndenter *Indenter, - WhitespaceManager *Whitespaces, - const FormatStyle &Style, - UnwrappedLineFormatter *BlockFormatter) - : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {} - - /// Formats the line by finding the best line breaks with line lengths - /// below the column limit. - unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent, - unsigned FirstStartColumn, bool DryRun) override { - LineState State = - Indenter->getInitialState(FirstIndent, FirstStartColumn, &Line, DryRun); - - // If the ObjC method declaration does not fit on a line, we should format - // it with one arg per line. - if (State.Line->Type == LT_ObjCMethodDecl) - State.Stack.back().BreakBeforeParameter = true; - - // Find best solution in solution space. - return analyzeSolutionSpace(State, DryRun); - } - -private: - struct CompareLineStatePointers { - bool operator()(LineState *obj1, LineState *obj2) const { - return *obj1 < *obj2; - } - }; - - /// A pair of <penalty, count> that is used to prioritize the BFS on. - /// - /// In case of equal penalties, we want to prefer states that were inserted - /// first. During state generation we make sure that we insert states first - /// that break the line as late as possible. - typedef std::pair<unsigned, unsigned> OrderedPenalty; - - /// An edge in the solution space from \c Previous->State to \c State, - /// inserting a newline dependent on the \c NewLine. - struct StateNode { - StateNode(const LineState &State, bool NewLine, StateNode *Previous) - : State(State), NewLine(NewLine), Previous(Previous) {} - LineState State; - bool NewLine; - StateNode *Previous; - }; - - /// An item in the prioritized BFS search queue. The \c StateNode's - /// \c State has the given \c OrderedPenalty. - typedef std::pair<OrderedPenalty, StateNode *> QueueItem; - - /// The BFS queue type. - typedef std::priority_queue<QueueItem, std::vector<QueueItem>, - std::greater<QueueItem>> - QueueType; - - /// Analyze the entire solution space starting from \p InitialState. - /// - /// This implements a variant of Dijkstra's algorithm on the graph that spans - /// the solution space (\c LineStates are the nodes). The algorithm tries to - /// find the shortest path (the one with lowest penalty) from \p InitialState - /// to a state where all tokens are placed. Returns the penalty. - /// - /// If \p DryRun is \c false, directly applies the changes. - unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) { - std::set<LineState *, CompareLineStatePointers> Seen; - - // Increasing count of \c StateNode items we have created. This is used to - // create a deterministic order independent of the container. - unsigned Count = 0; - QueueType Queue; - - // Insert start element into queue. - StateNode *Node = - new (Allocator.Allocate()) StateNode(InitialState, false, nullptr); - Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); - ++Count; - - unsigned Penalty = 0; - - // While not empty, take first element and follow edges. - while (!Queue.empty()) { - Penalty = Queue.top().first.first; - StateNode *Node = Queue.top().second; - if (!Node->State.NextToken) { - LLVM_DEBUG(llvm::dbgs() - << "\n---\nPenalty for line: " << Penalty << "\n"); - break; - } - Queue.pop(); - - // Cut off the analysis of certain solutions if the analysis gets too - // complex. See description of IgnoreStackForComparison. - if (Count > 50000) - Node->State.IgnoreStackForComparison = true; - - if (!Seen.insert(&Node->State).second) - // State already examined with lower penalty. - continue; - - FormatDecision LastFormat = Node->State.NextToken->Decision; - if (LastFormat == FD_Unformatted || LastFormat == FD_Continue) - addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue); - if (LastFormat == FD_Unformatted || LastFormat == FD_Break) - addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue); - } - - if (Queue.empty()) { - // We were unable to find a solution, do nothing. - // FIXME: Add diagnostic? - LLVM_DEBUG(llvm::dbgs() << "Could not find a solution.\n"); - return 0; - } - - // Reconstruct the solution. - if (!DryRun) - reconstructPath(InitialState, Queue.top().second); - - LLVM_DEBUG(llvm::dbgs() - << "Total number of analyzed states: " << Count << "\n"); - LLVM_DEBUG(llvm::dbgs() << "---\n"); - - return Penalty; - } - - /// Add the following state to the analysis queue \c Queue. - /// - /// Assume the current state is \p PreviousNode and has been reached with a - /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. - void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, - bool NewLine, unsigned *Count, QueueType *Queue) { - if (NewLine && !Indenter->canBreak(PreviousNode->State)) - return; - if (!NewLine && Indenter->mustBreak(PreviousNode->State)) - return; - - StateNode *Node = new (Allocator.Allocate()) - StateNode(PreviousNode->State, NewLine, PreviousNode); - if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty)) - return; - - Penalty += Indenter->addTokenToState(Node->State, NewLine, true); - - Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node)); - ++(*Count); - } - - /// Applies the best formatting by reconstructing the path in the - /// solution space that leads to \c Best. - void reconstructPath(LineState &State, StateNode *Best) { - std::deque<StateNode *> Path; - // We do not need a break before the initial token. - while (Best->Previous) { - Path.push_front(Best); - Best = Best->Previous; - } - for (auto I = Path.begin(), E = Path.end(); I != E; ++I) { - unsigned Penalty = 0; - formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty); - Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false); - - LLVM_DEBUG({ - printLineState((*I)->Previous->State); - if ((*I)->NewLine) { - llvm::dbgs() << "Penalty for placing " - << (*I)->Previous->State.NextToken->Tok.getName() - << " on a new line: " << Penalty << "\n"; - } - }); - } - } - - llvm::SpecificBumpPtrAllocator<StateNode> Allocator; -}; - -} // anonymous namespace - -unsigned -UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines, - bool DryRun, int AdditionalIndent, - bool FixBadIndentation, - unsigned FirstStartColumn, - unsigned NextStartColumn, - unsigned LastStartColumn) { - LineJoiner Joiner(Style, Keywords, Lines); - - // Try to look up already computed penalty in DryRun-mode. - std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey( - &Lines, AdditionalIndent); - auto CacheIt = PenaltyCache.find(CacheKey); - if (DryRun && CacheIt != PenaltyCache.end()) - return CacheIt->second; - - assert(!Lines.empty()); - unsigned Penalty = 0; - LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, - AdditionalIndent); - const AnnotatedLine *PreviousLine = nullptr; - const AnnotatedLine *NextLine = nullptr; - - // The minimum level of consecutive lines that have been formatted. - unsigned RangeMinLevel = UINT_MAX; - - bool FirstLine = true; - for (const AnnotatedLine *Line = - Joiner.getNextMergedLine(DryRun, IndentTracker); - Line; Line = NextLine, FirstLine = false) { - const AnnotatedLine &TheLine = *Line; - unsigned Indent = IndentTracker.getIndent(); - - // We continue formatting unchanged lines to adjust their indent, e.g. if a - // scope was added. However, we need to carefully stop doing this when we - // exit the scope of affected lines to prevent indenting a the entire - // remaining file if it currently missing a closing brace. - bool PreviousRBrace = - PreviousLine && PreviousLine->startsWith(tok::r_brace); - bool ContinueFormatting = - TheLine.Level > RangeMinLevel || - (TheLine.Level == RangeMinLevel && !PreviousRBrace && - !TheLine.startsWith(tok::r_brace)); - - bool FixIndentation = (FixBadIndentation || ContinueFormatting) && - Indent != TheLine.First->OriginalColumn; - bool ShouldFormat = TheLine.Affected || FixIndentation; - // We cannot format this line; if the reason is that the line had a - // parsing error, remember that. - if (ShouldFormat && TheLine.Type == LT_Invalid && Status) { - Status->FormatComplete = false; - Status->Line = - SourceMgr.getSpellingLineNumber(TheLine.First->Tok.getLocation()); - } - - if (ShouldFormat && TheLine.Type != LT_Invalid) { - if (!DryRun) { - bool LastLine = Line->First->is(tok::eof); - formatFirstToken(TheLine, PreviousLine, Lines, Indent, - LastLine ? LastStartColumn : NextStartColumn + Indent); - } - - NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); - unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); - bool FitsIntoOneLine = - TheLine.Last->TotalLength + Indent <= ColumnLimit || - (TheLine.Type == LT_ImportStatement && - (Style.Language != FormatStyle::LK_JavaScript || - !Style.JavaScriptWrapImports)); - if (Style.ColumnLimit == 0) - NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, NextStartColumn + Indent, - FirstLine ? FirstStartColumn : 0, DryRun); - else if (FitsIntoOneLine) - Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, NextStartColumn + Indent, - FirstLine ? FirstStartColumn : 0, DryRun); - else - Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this) - .formatLine(TheLine, NextStartColumn + Indent, - FirstLine ? FirstStartColumn : 0, DryRun); - RangeMinLevel = std::min(RangeMinLevel, TheLine.Level); - } else { - // If no token in the current line is affected, we still need to format - // affected children. - if (TheLine.ChildrenAffected) - for (const FormatToken *Tok = TheLine.First; Tok; Tok = Tok->Next) - if (!Tok->Children.empty()) - format(Tok->Children, DryRun); - - // Adapt following lines on the current indent level to the same level - // unless the current \c AnnotatedLine is not at the beginning of a line. - bool StartsNewLine = - TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst; - if (StartsNewLine) - IndentTracker.adjustToUnmodifiedLine(TheLine); - if (!DryRun) { - bool ReformatLeadingWhitespace = - StartsNewLine && ((PreviousLine && PreviousLine->Affected) || - TheLine.LeadingEmptyLinesAffected); - // Format the first token. - if (ReformatLeadingWhitespace) - formatFirstToken(TheLine, PreviousLine, Lines, - TheLine.First->OriginalColumn, - TheLine.First->OriginalColumn); - else - Whitespaces->addUntouchableToken(*TheLine.First, - TheLine.InPPDirective); - - // Notify the WhitespaceManager about the unchanged whitespace. - for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next) - Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective); - } - NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); - RangeMinLevel = UINT_MAX; - } - if (!DryRun) - markFinalized(TheLine.First); - PreviousLine = &TheLine; - } - PenaltyCache[CacheKey] = Penalty; - return Penalty; -} - -void UnwrappedLineFormatter::formatFirstToken( - const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, - const SmallVectorImpl<AnnotatedLine *> &Lines, unsigned Indent, - unsigned NewlineIndent) { - FormatToken &RootToken = *Line.First; - if (RootToken.is(tok::eof)) { - unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u); - unsigned TokenIndent = Newlines ? NewlineIndent : 0; - Whitespaces->replaceWhitespace(RootToken, Newlines, TokenIndent, - TokenIndent); - return; - } - unsigned Newlines = - std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); - // Remove empty lines before "}" where applicable. - if (RootToken.is(tok::r_brace) && - (!RootToken.Next || - (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)) && - // Do not remove empty lines before namespace closing "}". - !getNamespaceToken(&Line, Lines)) - Newlines = std::min(Newlines, 1u); - // Remove empty lines at the start of nested blocks (lambdas/arrow functions) - if (PreviousLine == nullptr && Line.Level > 0) - Newlines = std::min(Newlines, 1u); - if (Newlines == 0 && !RootToken.IsFirst) - Newlines = 1; - if (RootToken.IsFirst && !RootToken.HasUnescapedNewline) - Newlines = 0; - - // Remove empty lines after "{". - if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && - PreviousLine->Last->is(tok::l_brace) && - !PreviousLine->startsWithNamespace() && - !startsExternCBlock(*PreviousLine)) - Newlines = 1; - - // Insert extra new line before access specifiers. - if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) && - RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) - ++Newlines; - - // Remove empty lines after access specifiers. - if (PreviousLine && PreviousLine->First->isAccessSpecifier() && - (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline)) - Newlines = std::min(1u, Newlines); - - if (Newlines) - Indent = NewlineIndent; - - // Preprocessor directives get indented after the hash, if indented. - if (Line.Type == LT_PreprocessorDirective || Line.Type == LT_ImportStatement) - Indent = 0; - - Whitespaces->replaceWhitespace(RootToken, Newlines, Indent, Indent, - Line.InPPDirective && - !RootToken.HasUnescapedNewline); -} - -unsigned -UnwrappedLineFormatter::getColumnLimit(bool InPPDirective, - const AnnotatedLine *NextLine) const { - // In preprocessor directives reserve two chars for trailing " \" if the - // next line continues the preprocessor directive. - bool ContinuesPPDirective = - InPPDirective && - // If there is no next line, this is likely a child line and the parent - // continues the preprocessor directive. - (!NextLine || - (NextLine->InPPDirective && - // If there is an unescaped newline between this line and the next, the - // next line starts a new preprocessor directive. - !NextLine->First->HasUnescapedNewline)); - return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0); -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h b/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h deleted file mode 100644 index dac210ea62b..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineFormatter.h +++ /dev/null @@ -1,78 +0,0 @@ -//===--- UnwrappedLineFormatter.h - Format C++ code -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Implements a combinartorial exploration of all the different -/// linebreaks unwrapped lines can be formatted in. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H -#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H - -#include "ContinuationIndenter.h" -#include "clang/Format/Format.h" -#include <map> - -namespace clang { -namespace format { - -class ContinuationIndenter; -class WhitespaceManager; - -class UnwrappedLineFormatter { -public: - UnwrappedLineFormatter(ContinuationIndenter *Indenter, - WhitespaceManager *Whitespaces, - const FormatStyle &Style, - const AdditionalKeywords &Keywords, - const SourceManager &SourceMgr, - FormattingAttemptStatus *Status) - : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style), - Keywords(Keywords), SourceMgr(SourceMgr), Status(Status) {} - - /// Format the current block and return the penalty. - unsigned format(const SmallVectorImpl<AnnotatedLine *> &Lines, - bool DryRun = false, int AdditionalIndent = 0, - bool FixBadIndentation = false, - unsigned FirstStartColumn = 0, - unsigned NextStartColumn = 0, - unsigned LastStartColumn = 0); - -private: - /// Add a new line and the required indent before the first Token - /// of the \c UnwrappedLine if there was no structural parsing error. - void formatFirstToken(const AnnotatedLine &Line, - const AnnotatedLine *PreviousLine, - const SmallVectorImpl<AnnotatedLine *> &Lines, - unsigned Indent, unsigned NewlineIndent); - - /// Returns the column limit for a line, taking into account whether we - /// need an escaped newline due to a continued preprocessor directive. - unsigned getColumnLimit(bool InPPDirective, - const AnnotatedLine *NextLine) const; - - // Cache to store the penalty of formatting a vector of AnnotatedLines - // starting from a specific additional offset. Improves performance if there - // are many nested blocks. - std::map<std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned>, - unsigned> - PenaltyCache; - - ContinuationIndenter *Indenter; - WhitespaceManager *Whitespaces; - const FormatStyle &Style; - const AdditionalKeywords &Keywords; - const SourceManager &SourceMgr; - FormattingAttemptStatus *Status; -}; -} // end namespace format -} // end namespace clang - -#endif // LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEFORMATTER_H diff --git a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp b/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp deleted file mode 100644 index 3cd3c8f9cdf..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.cpp +++ /dev/null @@ -1,2678 +0,0 @@ -//===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the implementation of the UnwrappedLineParser, -/// which turns a stream of tokens into UnwrappedLines. -/// -//===----------------------------------------------------------------------===// - -#include "UnwrappedLineParser.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" - -#include <algorithm> - -#define DEBUG_TYPE "format-parser" - -namespace clang { -namespace format { - -class FormatTokenSource { -public: - virtual ~FormatTokenSource() {} - virtual FormatToken *getNextToken() = 0; - - virtual unsigned getPosition() = 0; - virtual FormatToken *setPosition(unsigned Position) = 0; -}; - -namespace { - -class ScopedDeclarationState { -public: - ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, - bool MustBeDeclaration) - : Line(Line), Stack(Stack) { - Line.MustBeDeclaration = MustBeDeclaration; - Stack.push_back(MustBeDeclaration); - } - ~ScopedDeclarationState() { - Stack.pop_back(); - if (!Stack.empty()) - Line.MustBeDeclaration = Stack.back(); - else - Line.MustBeDeclaration = true; - } - -private: - UnwrappedLine &Line; - std::vector<bool> &Stack; -}; - -static bool isLineComment(const FormatToken &FormatTok) { - return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*"); -} - -// Checks if \p FormatTok is a line comment that continues the line comment -// \p Previous. The original column of \p MinColumnToken is used to determine -// whether \p FormatTok is indented enough to the right to continue \p Previous. -static bool continuesLineComment(const FormatToken &FormatTok, - const FormatToken *Previous, - const FormatToken *MinColumnToken) { - if (!Previous || !MinColumnToken) - return false; - unsigned MinContinueColumn = - MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); - return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && - isLineComment(*Previous) && - FormatTok.OriginalColumn >= MinContinueColumn; -} - -class ScopedMacroState : public FormatTokenSource { -public: - ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, - FormatToken *&ResetToken) - : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), - PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), - Token(nullptr), PreviousToken(nullptr) { - FakeEOF.Tok.startToken(); - FakeEOF.Tok.setKind(tok::eof); - TokenSource = this; - Line.Level = 0; - Line.InPPDirective = true; - } - - ~ScopedMacroState() override { - TokenSource = PreviousTokenSource; - ResetToken = Token; - Line.InPPDirective = false; - Line.Level = PreviousLineLevel; - } - - FormatToken *getNextToken() override { - // The \c UnwrappedLineParser guards against this by never calling - // \c getNextToken() after it has encountered the first eof token. - assert(!eof()); - PreviousToken = Token; - Token = PreviousTokenSource->getNextToken(); - if (eof()) - return &FakeEOF; - return Token; - } - - unsigned getPosition() override { return PreviousTokenSource->getPosition(); } - - FormatToken *setPosition(unsigned Position) override { - PreviousToken = nullptr; - Token = PreviousTokenSource->setPosition(Position); - return Token; - } - -private: - bool eof() { - return Token && Token->HasUnescapedNewline && - !continuesLineComment(*Token, PreviousToken, - /*MinColumnToken=*/PreviousToken); - } - - FormatToken FakeEOF; - UnwrappedLine &Line; - FormatTokenSource *&TokenSource; - FormatToken *&ResetToken; - unsigned PreviousLineLevel; - FormatTokenSource *PreviousTokenSource; - - FormatToken *Token; - FormatToken *PreviousToken; -}; - -} // end anonymous namespace - -class ScopedLineState { -public: - ScopedLineState(UnwrappedLineParser &Parser, - bool SwitchToPreprocessorLines = false) - : Parser(Parser), OriginalLines(Parser.CurrentLines) { - if (SwitchToPreprocessorLines) - Parser.CurrentLines = &Parser.PreprocessorDirectives; - else if (!Parser.Line->Tokens.empty()) - Parser.CurrentLines = &Parser.Line->Tokens.back().Children; - PreBlockLine = std::move(Parser.Line); - Parser.Line = llvm::make_unique<UnwrappedLine>(); - Parser.Line->Level = PreBlockLine->Level; - Parser.Line->InPPDirective = PreBlockLine->InPPDirective; - } - - ~ScopedLineState() { - if (!Parser.Line->Tokens.empty()) { - Parser.addUnwrappedLine(); - } - assert(Parser.Line->Tokens.empty()); - Parser.Line = std::move(PreBlockLine); - if (Parser.CurrentLines == &Parser.PreprocessorDirectives) - Parser.MustBreakBeforeNextToken = true; - Parser.CurrentLines = OriginalLines; - } - -private: - UnwrappedLineParser &Parser; - - std::unique_ptr<UnwrappedLine> PreBlockLine; - SmallVectorImpl<UnwrappedLine> *OriginalLines; -}; - -class CompoundStatementIndenter { -public: - CompoundStatementIndenter(UnwrappedLineParser *Parser, - const FormatStyle &Style, unsigned &LineLevel) - : LineLevel(LineLevel), OldLineLevel(LineLevel) { - if (Style.BraceWrapping.AfterControlStatement) - Parser->addUnwrappedLine(); - if (Style.BraceWrapping.IndentBraces) - ++LineLevel; - } - ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } - -private: - unsigned &LineLevel; - unsigned OldLineLevel; -}; - -namespace { - -class IndexedTokenSource : public FormatTokenSource { -public: - IndexedTokenSource(ArrayRef<FormatToken *> Tokens) - : Tokens(Tokens), Position(-1) {} - - FormatToken *getNextToken() override { - ++Position; - return Tokens[Position]; - } - - unsigned getPosition() override { - assert(Position >= 0); - return Position; - } - - FormatToken *setPosition(unsigned P) override { - Position = P; - return Tokens[Position]; - } - - void reset() { Position = -1; } - -private: - ArrayRef<FormatToken *> Tokens; - int Position; -}; - -} // end anonymous namespace - -UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, - ArrayRef<FormatToken *> Tokens, - UnwrappedLineConsumer &Callback) - : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), - CurrentLines(&Lines), Style(Style), Keywords(Keywords), - CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), - Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1), - IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None - ? IG_Rejected - : IG_Inited), - IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} - -void UnwrappedLineParser::reset() { - PPBranchLevel = -1; - IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None - ? IG_Rejected - : IG_Inited; - IncludeGuardToken = nullptr; - Line.reset(new UnwrappedLine); - CommentsBeforeNextToken.clear(); - FormatTok = nullptr; - MustBreakBeforeNextToken = false; - PreprocessorDirectives.clear(); - CurrentLines = &Lines; - DeclarationScopeStack.clear(); - PPStack.clear(); - Line->FirstStartColumn = FirstStartColumn; -} - -void UnwrappedLineParser::parse() { - IndexedTokenSource TokenSource(AllTokens); - Line->FirstStartColumn = FirstStartColumn; - do { - LLVM_DEBUG(llvm::dbgs() << "----\n"); - reset(); - Tokens = &TokenSource; - TokenSource.reset(); - - readToken(); - parseFile(); - - // If we found an include guard then all preprocessor directives (other than - // the guard) are over-indented by one. - if (IncludeGuard == IG_Found) - for (auto &Line : Lines) - if (Line.InPPDirective && Line.Level > 0) - --Line.Level; - - // Create line with eof token. - pushToken(FormatTok); - addUnwrappedLine(); - - for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), - E = Lines.end(); - I != E; ++I) { - Callback.consumeUnwrappedLine(*I); - } - Callback.finishRun(); - Lines.clear(); - while (!PPLevelBranchIndex.empty() && - PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { - PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); - PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); - } - if (!PPLevelBranchIndex.empty()) { - ++PPLevelBranchIndex.back(); - assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); - assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); - } - } while (!PPLevelBranchIndex.empty()); -} - -void UnwrappedLineParser::parseFile() { - // The top-level context in a file always has declarations, except for pre- - // processor directives and JavaScript files. - bool MustBeDeclaration = - !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; - ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - MustBeDeclaration); - if (Style.Language == FormatStyle::LK_TextProto) - parseBracedList(); - else - parseLevel(/*HasOpeningBrace=*/false); - // Make sure to format the remaining tokens. - // - // LK_TextProto is special since its top-level is parsed as the body of a - // braced list, which does not necessarily have natural line separators such - // as a semicolon. Comments after the last entry that have been determined to - // not belong to that line, as in: - // key: value - // // endfile comment - // do not have a chance to be put on a line of their own until this point. - // Here we add this newline before end-of-file comments. - if (Style.Language == FormatStyle::LK_TextProto && - !CommentsBeforeNextToken.empty()) - addUnwrappedLine(); - flushComments(true); - addUnwrappedLine(); -} - -void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { - bool SwitchLabelEncountered = false; - do { - tok::TokenKind kind = FormatTok->Tok.getKind(); - if (FormatTok->Type == TT_MacroBlockBegin) { - kind = tok::l_brace; - } else if (FormatTok->Type == TT_MacroBlockEnd) { - kind = tok::r_brace; - } - - switch (kind) { - case tok::comment: - nextToken(); - addUnwrappedLine(); - break; - case tok::l_brace: - // FIXME: Add parameter whether this can happen - if this happens, we must - // be in a non-declaration context. - if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList()) - continue; - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - break; - case tok::r_brace: - if (HasOpeningBrace) - return; - nextToken(); - addUnwrappedLine(); - break; - case tok::kw_default: { - unsigned StoredPosition = Tokens->getPosition(); - FormatToken *Next; - do { - Next = Tokens->getNextToken(); - } while (Next && Next->is(tok::comment)); - FormatTok = Tokens->setPosition(StoredPosition); - if (Next && Next->isNot(tok::colon)) { - // default not followed by ':' is not a case label; treat it like - // an identifier. - parseStructuralElement(); - break; - } - // Else, if it is 'default:', fall through to the case handling. - LLVM_FALLTHROUGH; - } - case tok::kw_case: - if (Style.Language == FormatStyle::LK_JavaScript && - Line->MustBeDeclaration) { - // A 'case: string' style field declaration. - parseStructuralElement(); - break; - } - if (!SwitchLabelEncountered && - (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) - ++Line->Level; - SwitchLabelEncountered = true; - parseStructuralElement(); - break; - default: - parseStructuralElement(); - break; - } - } while (!eof()); -} - -void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { - // We'll parse forward through the tokens until we hit - // a closing brace or eof - note that getNextToken() will - // parse macros, so this will magically work inside macro - // definitions, too. - unsigned StoredPosition = Tokens->getPosition(); - FormatToken *Tok = FormatTok; - const FormatToken *PrevTok = Tok->Previous; - // Keep a stack of positions of lbrace tokens. We will - // update information about whether an lbrace starts a - // braced init list or a different block during the loop. - SmallVector<FormatToken *, 8> LBraceStack; - assert(Tok->Tok.is(tok::l_brace)); - do { - // Get next non-comment token. - FormatToken *NextTok; - unsigned ReadTokens = 0; - do { - NextTok = Tokens->getNextToken(); - ++ReadTokens; - } while (NextTok->is(tok::comment)); - - switch (Tok->Tok.getKind()) { - case tok::l_brace: - if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) { - if (PrevTok->isOneOf(tok::colon, tok::less)) - // A ':' indicates this code is in a type, or a braced list - // following a label in an object literal ({a: {b: 1}}). - // A '<' could be an object used in a comparison, but that is nonsense - // code (can never return true), so more likely it is a generic type - // argument (`X<{a: string; b: number}>`). - // The code below could be confused by semicolons between the - // individual members in a type member list, which would normally - // trigger BK_Block. In both cases, this must be parsed as an inline - // braced init. - Tok->BlockKind = BK_BracedInit; - else if (PrevTok->is(tok::r_paren)) - // `) { }` can only occur in function or method declarations in JS. - Tok->BlockKind = BK_Block; - } else { - Tok->BlockKind = BK_Unknown; - } - LBraceStack.push_back(Tok); - break; - case tok::r_brace: - if (LBraceStack.empty()) - break; - if (LBraceStack.back()->BlockKind == BK_Unknown) { - bool ProbablyBracedList = false; - if (Style.Language == FormatStyle::LK_Proto) { - ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); - } else { - // Using OriginalColumn to distinguish between ObjC methods and - // binary operators is a bit hacky. - bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && - NextTok->OriginalColumn == 0; - - // If there is a comma, semicolon or right paren after the closing - // brace, we assume this is a braced initializer list. Note that - // regardless how we mark inner braces here, we will overwrite the - // BlockKind later if we parse a braced list (where all blocks - // inside are by default braced lists), or when we explicitly detect - // blocks (for example while parsing lambdas). - // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a - // braced list in JS. - ProbablyBracedList = - (Style.Language == FormatStyle::LK_JavaScript && - NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in, - Keywords.kw_as)) || - (Style.isCpp() && NextTok->is(tok::l_paren)) || - NextTok->isOneOf(tok::comma, tok::period, tok::colon, - tok::r_paren, tok::r_square, tok::l_brace, - tok::ellipsis) || - (NextTok->is(tok::identifier) && - !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) || - (NextTok->is(tok::semi) && - (!ExpectClassBody || LBraceStack.size() != 1)) || - (NextTok->isBinaryOperator() && !NextIsObjCMethod); - if (NextTok->is(tok::l_square)) { - // We can have an array subscript after a braced init - // list, but C++11 attributes are expected after blocks. - NextTok = Tokens->getNextToken(); - ++ReadTokens; - ProbablyBracedList = NextTok->isNot(tok::l_square); - } - } - if (ProbablyBracedList) { - Tok->BlockKind = BK_BracedInit; - LBraceStack.back()->BlockKind = BK_BracedInit; - } else { - Tok->BlockKind = BK_Block; - LBraceStack.back()->BlockKind = BK_Block; - } - } - LBraceStack.pop_back(); - break; - case tok::identifier: - if (!Tok->is(TT_StatementMacro)) - break; - LLVM_FALLTHROUGH; - case tok::at: - case tok::semi: - case tok::kw_if: - case tok::kw_while: - case tok::kw_for: - case tok::kw_switch: - case tok::kw_try: - case tok::kw___try: - if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown) - LBraceStack.back()->BlockKind = BK_Block; - break; - default: - break; - } - PrevTok = Tok; - Tok = NextTok; - } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); - - // Assume other blocks for all unclosed opening braces. - for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { - if (LBraceStack[i]->BlockKind == BK_Unknown) - LBraceStack[i]->BlockKind = BK_Block; - } - - FormatTok = Tokens->setPosition(StoredPosition); -} - -template <class T> -static inline void hash_combine(std::size_t &seed, const T &v) { - std::hash<T> hasher; - seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); -} - -size_t UnwrappedLineParser::computePPHash() const { - size_t h = 0; - for (const auto &i : PPStack) { - hash_combine(h, size_t(i.Kind)); - hash_combine(h, i.Line); - } - return h; -} - -void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, - bool MunchSemi) { - assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) && - "'{' or macro block token expected"); - const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin); - FormatTok->BlockKind = BK_Block; - - size_t PPStartHash = computePPHash(); - - unsigned InitialLevel = Line->Level; - nextToken(/*LevelDifference=*/AddLevel ? 1 : 0); - - if (MacroBlock && FormatTok->is(tok::l_paren)) - parseParens(); - - size_t NbPreprocessorDirectives = - CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; - addUnwrappedLine(); - size_t OpeningLineIndex = - CurrentLines->empty() - ? (UnwrappedLine::kInvalidIndex) - : (CurrentLines->size() - 1 - NbPreprocessorDirectives); - - ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - MustBeDeclaration); - if (AddLevel) - ++Line->Level; - parseLevel(/*HasOpeningBrace=*/true); - - if (eof()) - return; - - if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd) - : !FormatTok->is(tok::r_brace)) { - Line->Level = InitialLevel; - FormatTok->BlockKind = BK_Block; - return; - } - - size_t PPEndHash = computePPHash(); - - // Munch the closing brace. - nextToken(/*LevelDifference=*/AddLevel ? -1 : 0); - - if (MacroBlock && FormatTok->is(tok::l_paren)) - parseParens(); - - if (MunchSemi && FormatTok->Tok.is(tok::semi)) - nextToken(); - Line->Level = InitialLevel; - - if (PPStartHash == PPEndHash) { - Line->MatchingOpeningBlockLineIndex = OpeningLineIndex; - if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) { - // Update the opening line to add the forward reference as well - (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex = - CurrentLines->size() - 1; - } - } -} - -static bool isGoogScope(const UnwrappedLine &Line) { - // FIXME: Closure-library specific stuff should not be hard-coded but be - // configurable. - if (Line.Tokens.size() < 4) - return false; - auto I = Line.Tokens.begin(); - if (I->Tok->TokenText != "goog") - return false; - ++I; - if (I->Tok->isNot(tok::period)) - return false; - ++I; - if (I->Tok->TokenText != "scope") - return false; - ++I; - return I->Tok->is(tok::l_paren); -} - -static bool isIIFE(const UnwrappedLine &Line, - const AdditionalKeywords &Keywords) { - // Look for the start of an immediately invoked anonymous function. - // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression - // This is commonly done in JavaScript to create a new, anonymous scope. - // Example: (function() { ... })() - if (Line.Tokens.size() < 3) - return false; - auto I = Line.Tokens.begin(); - if (I->Tok->isNot(tok::l_paren)) - return false; - ++I; - if (I->Tok->isNot(Keywords.kw_function)) - return false; - ++I; - return I->Tok->is(tok::l_paren); -} - -static bool ShouldBreakBeforeBrace(const FormatStyle &Style, - const FormatToken &InitialToken) { - if (InitialToken.is(tok::kw_namespace)) - return Style.BraceWrapping.AfterNamespace; - if (InitialToken.is(tok::kw_class)) - return Style.BraceWrapping.AfterClass; - if (InitialToken.is(tok::kw_union)) - return Style.BraceWrapping.AfterUnion; - if (InitialToken.is(tok::kw_struct)) - return Style.BraceWrapping.AfterStruct; - return false; -} - -void UnwrappedLineParser::parseChildBlock() { - FormatTok->BlockKind = BK_Block; - nextToken(); - { - bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript && - (isGoogScope(*Line) || isIIFE(*Line, Keywords))); - ScopedLineState LineState(*this); - ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, - /*MustBeDeclaration=*/false); - Line->Level += SkipIndent ? 0 : 1; - parseLevel(/*HasOpeningBrace=*/true); - flushComments(isOnNewLine(*FormatTok)); - Line->Level -= SkipIndent ? 0 : 1; - } - nextToken(); -} - -void UnwrappedLineParser::parsePPDirective() { - assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); - ScopedMacroState MacroState(*Line, Tokens, FormatTok); - nextToken(); - - if (!FormatTok->Tok.getIdentifierInfo()) { - parsePPUnknown(); - return; - } - - switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { - case tok::pp_define: - parsePPDefine(); - return; - case tok::pp_if: - parsePPIf(/*IfDef=*/false); - break; - case tok::pp_ifdef: - case tok::pp_ifndef: - parsePPIf(/*IfDef=*/true); - break; - case tok::pp_else: - parsePPElse(); - break; - case tok::pp_elif: - parsePPElIf(); - break; - case tok::pp_endif: - parsePPEndIf(); - break; - default: - parsePPUnknown(); - break; - } -} - -void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { - size_t Line = CurrentLines->size(); - if (CurrentLines == &PreprocessorDirectives) - Line += Lines.size(); - - if (Unreachable || - (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) - PPStack.push_back({PP_Unreachable, Line}); - else - PPStack.push_back({PP_Conditional, Line}); -} - -void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { - ++PPBranchLevel; - assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); - if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { - PPLevelBranchIndex.push_back(0); - PPLevelBranchCount.push_back(0); - } - PPChainBranchIndex.push(0); - bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; - conditionalCompilationCondition(Unreachable || Skip); -} - -void UnwrappedLineParser::conditionalCompilationAlternative() { - if (!PPStack.empty()) - PPStack.pop_back(); - assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); - if (!PPChainBranchIndex.empty()) - ++PPChainBranchIndex.top(); - conditionalCompilationCondition( - PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && - PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); -} - -void UnwrappedLineParser::conditionalCompilationEnd() { - assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); - if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { - if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { - PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; - } - } - // Guard against #endif's without #if. - if (PPBranchLevel > -1) - --PPBranchLevel; - if (!PPChainBranchIndex.empty()) - PPChainBranchIndex.pop(); - if (!PPStack.empty()) - PPStack.pop_back(); -} - -void UnwrappedLineParser::parsePPIf(bool IfDef) { - bool IfNDef = FormatTok->is(tok::pp_ifndef); - nextToken(); - bool Unreachable = false; - if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0")) - Unreachable = true; - if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG") - Unreachable = true; - conditionalCompilationStart(Unreachable); - FormatToken *IfCondition = FormatTok; - // If there's a #ifndef on the first line, and the only lines before it are - // comments, it could be an include guard. - bool MaybeIncludeGuard = IfNDef; - if (IncludeGuard == IG_Inited && MaybeIncludeGuard) - for (auto &Line : Lines) { - if (!Line.Tokens.front().Tok->is(tok::comment)) { - MaybeIncludeGuard = false; - IncludeGuard = IG_Rejected; - break; - } - } - --PPBranchLevel; - parsePPUnknown(); - ++PPBranchLevel; - if (IncludeGuard == IG_Inited && MaybeIncludeGuard) { - IncludeGuard = IG_IfNdefed; - IncludeGuardToken = IfCondition; - } -} - -void UnwrappedLineParser::parsePPElse() { - // If a potential include guard has an #else, it's not an include guard. - if (IncludeGuard == IG_Defined && PPBranchLevel == 0) - IncludeGuard = IG_Rejected; - conditionalCompilationAlternative(); - if (PPBranchLevel > -1) - --PPBranchLevel; - parsePPUnknown(); - ++PPBranchLevel; -} - -void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } - -void UnwrappedLineParser::parsePPEndIf() { - conditionalCompilationEnd(); - parsePPUnknown(); - // If the #endif of a potential include guard is the last thing in the file, - // then we found an include guard. - unsigned TokenPosition = Tokens->getPosition(); - FormatToken *PeekNext = AllTokens[TokenPosition]; - if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && - PeekNext->is(tok::eof) && - Style.IndentPPDirectives != FormatStyle::PPDIS_None) - IncludeGuard = IG_Found; -} - -void UnwrappedLineParser::parsePPDefine() { - nextToken(); - - if (FormatTok->Tok.getKind() != tok::identifier) { - IncludeGuard = IG_Rejected; - IncludeGuardToken = nullptr; - parsePPUnknown(); - return; - } - - if (IncludeGuard == IG_IfNdefed && - IncludeGuardToken->TokenText == FormatTok->TokenText) { - IncludeGuard = IG_Defined; - IncludeGuardToken = nullptr; - for (auto &Line : Lines) { - if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) { - IncludeGuard = IG_Rejected; - break; - } - } - } - - nextToken(); - if (FormatTok->Tok.getKind() == tok::l_paren && - FormatTok->WhitespaceRange.getBegin() == - FormatTok->WhitespaceRange.getEnd()) { - parseParens(); - } - if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) - Line->Level += PPBranchLevel + 1; - addUnwrappedLine(); - ++Line->Level; - - // Errors during a preprocessor directive can only affect the layout of the - // preprocessor directive, and thus we ignore them. An alternative approach - // would be to use the same approach we use on the file level (no - // re-indentation if there was a structural error) within the macro - // definition. - parseFile(); -} - -void UnwrappedLineParser::parsePPUnknown() { - do { - nextToken(); - } while (!eof()); - if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash) - Line->Level += PPBranchLevel + 1; - addUnwrappedLine(); -} - -// Here we blacklist certain tokens that are not usually the first token in an -// unwrapped line. This is used in attempt to distinguish macro calls without -// trailing semicolons from other constructs split to several lines. -static bool tokenCanStartNewLine(const clang::Token &Tok) { - // Semicolon can be a null-statement, l_square can be a start of a macro or - // a C++11 attribute, but this doesn't seem to be common. - return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && - Tok.isNot(tok::l_square) && - // Tokens that can only be used as binary operators and a part of - // overloaded operator names. - Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && - Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && - Tok.isNot(tok::less) && Tok.isNot(tok::greater) && - Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && - Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && - Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && - Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && - Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && - Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && - Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && - Tok.isNot(tok::lesslessequal) && - // Colon is used in labels, base class lists, initializer lists, - // range-based for loops, ternary operator, but should never be the - // first token in an unwrapped line. - Tok.isNot(tok::colon) && - // 'noexcept' is a trailing annotation. - Tok.isNot(tok::kw_noexcept); -} - -static bool mustBeJSIdent(const AdditionalKeywords &Keywords, - const FormatToken *FormatTok) { - // FIXME: This returns true for C/C++ keywords like 'struct'. - return FormatTok->is(tok::identifier) && - (FormatTok->Tok.getIdentifierInfo() == nullptr || - !FormatTok->isOneOf( - Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async, - Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally, - Keywords.kw_function, Keywords.kw_import, Keywords.kw_is, - Keywords.kw_let, Keywords.kw_var, tok::kw_const, - Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements, - Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws, - Keywords.kw_from)); -} - -static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords, - const FormatToken *FormatTok) { - return FormatTok->Tok.isLiteral() || - FormatTok->isOneOf(tok::kw_true, tok::kw_false) || - mustBeJSIdent(Keywords, FormatTok); -} - -// isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement -// when encountered after a value (see mustBeJSIdentOrValue). -static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords, - const FormatToken *FormatTok) { - return FormatTok->isOneOf( - tok::kw_return, Keywords.kw_yield, - // conditionals - tok::kw_if, tok::kw_else, - // loops - tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break, - // switch/case - tok::kw_switch, tok::kw_case, - // exceptions - tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally, - // declaration - tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let, - Keywords.kw_async, Keywords.kw_function, - // import/export - Keywords.kw_import, tok::kw_export); -} - -// readTokenWithJavaScriptASI reads the next token and terminates the current -// line if JavaScript Automatic Semicolon Insertion must -// happen between the current token and the next token. -// -// This method is conservative - it cannot cover all edge cases of JavaScript, -// but only aims to correctly handle certain well known cases. It *must not* -// return true in speculative cases. -void UnwrappedLineParser::readTokenWithJavaScriptASI() { - FormatToken *Previous = FormatTok; - readToken(); - FormatToken *Next = FormatTok; - - bool IsOnSameLine = - CommentsBeforeNextToken.empty() - ? Next->NewlinesBefore == 0 - : CommentsBeforeNextToken.front()->NewlinesBefore == 0; - if (IsOnSameLine) - return; - - bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous); - bool PreviousStartsTemplateExpr = - Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${"); - if (PreviousMustBeValue || Previous->is(tok::r_paren)) { - // If the line contains an '@' sign, the previous token might be an - // annotation, which can precede another identifier/value. - bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(), - [](UnwrappedLineNode &LineNode) { - return LineNode.Tok->is(tok::at); - }) != Line->Tokens.end(); - if (HasAt) - return; - } - if (Next->is(tok::exclaim) && PreviousMustBeValue) - return addUnwrappedLine(); - bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next); - bool NextEndsTemplateExpr = - Next->is(TT_TemplateString) && Next->TokenText.startswith("}"); - if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr && - (PreviousMustBeValue || - Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus, - tok::minusminus))) - return addUnwrappedLine(); - if ((PreviousMustBeValue || Previous->is(tok::r_paren)) && - isJSDeclOrStmt(Keywords, Next)) - return addUnwrappedLine(); -} - -void UnwrappedLineParser::parseStructuralElement() { - assert(!FormatTok->is(tok::l_brace)); - if (Style.Language == FormatStyle::LK_TableGen && - FormatTok->is(tok::pp_include)) { - nextToken(); - if (FormatTok->is(tok::string_literal)) - nextToken(); - addUnwrappedLine(); - return; - } - switch (FormatTok->Tok.getKind()) { - case tok::kw_asm: - nextToken(); - if (FormatTok->is(tok::l_brace)) { - FormatTok->Type = TT_InlineASMBrace; - nextToken(); - while (FormatTok && FormatTok->isNot(tok::eof)) { - if (FormatTok->is(tok::r_brace)) { - FormatTok->Type = TT_InlineASMBrace; - nextToken(); - addUnwrappedLine(); - break; - } - FormatTok->Finalized = true; - nextToken(); - } - } - break; - case tok::kw_namespace: - parseNamespace(); - return; - case tok::kw_public: - case tok::kw_protected: - case tok::kw_private: - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) - nextToken(); - else - parseAccessSpecifier(); - return; - case tok::kw_if: - parseIfThenElse(); - return; - case tok::kw_for: - case tok::kw_while: - parseForOrWhileLoop(); - return; - case tok::kw_do: - parseDoWhile(); - return; - case tok::kw_switch: - if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) - // 'switch: string' field declaration. - break; - parseSwitch(); - return; - case tok::kw_default: - if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) - // 'default: string' field declaration. - break; - nextToken(); - if (FormatTok->is(tok::colon)) { - parseLabel(); - return; - } - // e.g. "default void f() {}" in a Java interface. - break; - case tok::kw_case: - if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) - // 'case: string' field declaration. - break; - parseCaseLabel(); - return; - case tok::kw_try: - case tok::kw___try: - parseTryCatch(); - return; - case tok::kw_extern: - nextToken(); - if (FormatTok->Tok.is(tok::string_literal)) { - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterExternBlock) { - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true); - } else { - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); - } - addUnwrappedLine(); - return; - } - } - break; - case tok::kw_export: - if (Style.Language == FormatStyle::LK_JavaScript) { - parseJavaScriptEs6ImportExport(); - return; - } - if (!Style.isCpp()) - break; - // Handle C++ "(inline|export) namespace". - LLVM_FALLTHROUGH; - case tok::kw_inline: - nextToken(); - if (FormatTok->Tok.is(tok::kw_namespace)) { - parseNamespace(); - return; - } - break; - case tok::identifier: - if (FormatTok->is(TT_ForEachMacro)) { - parseForOrWhileLoop(); - return; - } - if (FormatTok->is(TT_MacroBlockBegin)) { - parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true, - /*MunchSemi=*/false); - return; - } - if (FormatTok->is(Keywords.kw_import)) { - if (Style.Language == FormatStyle::LK_JavaScript) { - parseJavaScriptEs6ImportExport(); - return; - } - if (Style.Language == FormatStyle::LK_Proto) { - nextToken(); - if (FormatTok->is(tok::kw_public)) - nextToken(); - if (!FormatTok->is(tok::string_literal)) - return; - nextToken(); - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - return; - } - } - if (Style.isCpp() && - FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals, - Keywords.kw_slots, Keywords.kw_qslots)) { - nextToken(); - if (FormatTok->is(tok::colon)) { - nextToken(); - addUnwrappedLine(); - return; - } - } - if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { - parseStatementMacro(); - return; - } - // In all other cases, parse the declaration. - break; - default: - break; - } - do { - const FormatToken *Previous = FormatTok->Previous; - switch (FormatTok->Tok.getKind()) { - case tok::at: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - nextToken(); - parseBracedList(); - break; - } else if (Style.Language == FormatStyle::LK_Java && - FormatTok->is(Keywords.kw_interface)) { - nextToken(); - break; - } - switch (FormatTok->Tok.getObjCKeywordID()) { - case tok::objc_public: - case tok::objc_protected: - case tok::objc_package: - case tok::objc_private: - return parseAccessSpecifier(); - case tok::objc_interface: - case tok::objc_implementation: - return parseObjCInterfaceOrImplementation(); - case tok::objc_protocol: - if (parseObjCProtocol()) - return; - break; - case tok::objc_end: - return; // Handled by the caller. - case tok::objc_optional: - case tok::objc_required: - nextToken(); - addUnwrappedLine(); - return; - case tok::objc_autoreleasepool: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterControlStatement) - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/false); - } - addUnwrappedLine(); - return; - case tok::objc_synchronized: - nextToken(); - if (FormatTok->Tok.is(tok::l_paren)) - // Skip synchronization object - parseParens(); - if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterControlStatement) - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/false); - } - addUnwrappedLine(); - return; - case tok::objc_try: - // This branch isn't strictly necessary (the kw_try case below would - // do this too after the tok::at is parsed above). But be explicit. - parseTryCatch(); - return; - default: - break; - } - break; - case tok::kw_enum: - // Ignore if this is part of "template <enum ...". - if (Previous && Previous->is(tok::less)) { - nextToken(); - break; - } - - // parseEnum falls through and does not yet add an unwrapped line as an - // enum definition can start a structural element. - if (!parseEnum()) - break; - // This only applies for C++. - if (!Style.isCpp()) { - addUnwrappedLine(); - return; - } - break; - case tok::kw_typedef: - nextToken(); - if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, - Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) - parseEnum(); - break; - case tok::kw_struct: - case tok::kw_union: - case tok::kw_class: - // parseRecord falls through and does not yet add an unwrapped line as a - // record declaration or definition can start a structural element. - parseRecord(); - // This does not apply for Java and JavaScript. - if (Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) { - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - return; - } - break; - case tok::period: - nextToken(); - // In Java, classes have an implicit static member "class". - if (Style.Language == FormatStyle::LK_Java && FormatTok && - FormatTok->is(tok::kw_class)) - nextToken(); - if (Style.Language == FormatStyle::LK_JavaScript && FormatTok && - FormatTok->Tok.getIdentifierInfo()) - // JavaScript only has pseudo keywords, all keywords are allowed to - // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6 - nextToken(); - break; - case tok::semi: - nextToken(); - addUnwrappedLine(); - return; - case tok::r_brace: - addUnwrappedLine(); - return; - case tok::l_paren: - parseParens(); - break; - case tok::kw_operator: - nextToken(); - if (FormatTok->isBinaryOperator()) - nextToken(); - break; - case tok::caret: - nextToken(); - if (FormatTok->Tok.isAnyIdentifier() || - FormatTok->isSimpleTypeSpecifier()) - nextToken(); - if (FormatTok->is(tok::l_paren)) - parseParens(); - if (FormatTok->is(tok::l_brace)) - parseChildBlock(); - break; - case tok::l_brace: - if (!tryToParseBracedList()) { - // A block outside of parentheses must be the last part of a - // structural element. - // FIXME: Figure out cases where this is not true, and add projections - // for them (the one we know is missing are lambdas). - if (Style.BraceWrapping.AfterFunction) - addUnwrappedLine(); - FormatTok->Type = TT_FunctionLBrace; - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - return; - } - // Otherwise this was a braced init list, and the structural - // element continues. - break; - case tok::kw_try: - // We arrive here when parsing function-try blocks. - if (Style.BraceWrapping.AfterFunction) - addUnwrappedLine(); - parseTryCatch(); - return; - case tok::identifier: { - if (FormatTok->is(TT_MacroBlockEnd)) { - addUnwrappedLine(); - return; - } - - // Function declarations (as opposed to function expressions) are parsed - // on their own unwrapped line by continuing this loop. Function - // expressions (functions that are not on their own line) must not create - // a new unwrapped line, so they are special cased below. - size_t TokenCount = Line->Tokens.size(); - if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->is(Keywords.kw_function) && - (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is( - Keywords.kw_async)))) { - tryToParseJSFunction(); - break; - } - if ((Style.Language == FormatStyle::LK_JavaScript || - Style.Language == FormatStyle::LK_Java) && - FormatTok->is(Keywords.kw_interface)) { - if (Style.Language == FormatStyle::LK_JavaScript) { - // In JavaScript/TypeScript, "interface" can be used as a standalone - // identifier, e.g. in `var interface = 1;`. If "interface" is - // followed by another identifier, it is very like to be an actual - // interface declaration. - unsigned StoredPosition = Tokens->getPosition(); - FormatToken *Next = Tokens->getNextToken(); - FormatTok = Tokens->setPosition(StoredPosition); - if (Next && !mustBeJSIdent(Keywords, Next)) { - nextToken(); - break; - } - } - parseRecord(); - addUnwrappedLine(); - return; - } - - if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) { - parseStatementMacro(); - return; - } - - // See if the following token should start a new unwrapped line. - StringRef Text = FormatTok->TokenText; - nextToken(); - if (Line->Tokens.size() == 1 && - // JS doesn't have macros, and within classes colons indicate fields, - // not labels. - Style.Language != FormatStyle::LK_JavaScript) { - if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { - Line->Tokens.begin()->Tok->MustBreakBefore = true; - parseLabel(); - return; - } - // Recognize function-like macro usages without trailing semicolon as - // well as free-standing macros like Q_OBJECT. - bool FunctionLike = FormatTok->is(tok::l_paren); - if (FunctionLike) - parseParens(); - - bool FollowedByNewline = - CommentsBeforeNextToken.empty() - ? FormatTok->NewlinesBefore > 0 - : CommentsBeforeNextToken.front()->NewlinesBefore > 0; - - if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && - tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { - addUnwrappedLine(); - return; - } - } - break; - } - case tok::equal: - // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType - // TT_JsFatArrow. The always start an expression or a child block if - // followed by a curly. - if (FormatTok->is(TT_JsFatArrow)) { - nextToken(); - if (FormatTok->is(tok::l_brace)) - parseChildBlock(); - break; - } - - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - nextToken(); - parseBracedList(); - } else if (Style.Language == FormatStyle::LK_Proto && - FormatTok->Tok.is(tok::less)) { - nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, - /*ClosingBraceKind=*/tok::greater); - } - break; - case tok::l_square: - parseSquare(); - break; - case tok::kw_new: - parseNew(); - break; - default: - nextToken(); - break; - } - } while (!eof()); -} - -bool UnwrappedLineParser::tryToParseLambda() { - if (!Style.isCpp()) { - nextToken(); - return false; - } - assert(FormatTok->is(tok::l_square)); - FormatToken &LSquare = *FormatTok; - if (!tryToParseLambdaIntroducer()) - return false; - - while (FormatTok->isNot(tok::l_brace)) { - if (FormatTok->isSimpleTypeSpecifier()) { - nextToken(); - continue; - } - switch (FormatTok->Tok.getKind()) { - case tok::l_brace: - break; - case tok::l_paren: - parseParens(); - break; - case tok::amp: - case tok::star: - case tok::kw_const: - case tok::comma: - case tok::less: - case tok::greater: - case tok::identifier: - case tok::numeric_constant: - case tok::coloncolon: - case tok::kw_mutable: - nextToken(); - break; - case tok::arrow: - FormatTok->Type = TT_LambdaArrow; - nextToken(); - break; - default: - return true; - } - } - LSquare.Type = TT_LambdaLSquare; - parseChildBlock(); - return true; -} - -bool UnwrappedLineParser::tryToParseLambdaIntroducer() { - const FormatToken *Previous = FormatTok->Previous; - if (Previous && - (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new, - tok::kw_delete, tok::l_square) || - FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() || - Previous->isSimpleTypeSpecifier())) { - nextToken(); - return false; - } - nextToken(); - if (FormatTok->is(tok::l_square)) { - return false; - } - parseSquare(/*LambdaIntroducer=*/true); - return true; -} - -void UnwrappedLineParser::tryToParseJSFunction() { - assert(FormatTok->is(Keywords.kw_function) || - FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)); - if (FormatTok->is(Keywords.kw_async)) - nextToken(); - // Consume "function". - nextToken(); - - // Consume * (generator function). Treat it like C++'s overloaded operators. - if (FormatTok->is(tok::star)) { - FormatTok->Type = TT_OverloadedOperator; - nextToken(); - } - - // Consume function name. - if (FormatTok->is(tok::identifier)) - nextToken(); - - if (FormatTok->isNot(tok::l_paren)) - return; - - // Parse formal parameter list. - parseParens(); - - if (FormatTok->is(tok::colon)) { - // Parse a type definition. - nextToken(); - - // Eat the type declaration. For braced inline object types, balance braces, - // otherwise just parse until finding an l_brace for the function body. - if (FormatTok->is(tok::l_brace)) - tryToParseBracedList(); - else - while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof()) - nextToken(); - } - - if (FormatTok->is(tok::semi)) - return; - - parseChildBlock(); -} - -bool UnwrappedLineParser::tryToParseBracedList() { - if (FormatTok->BlockKind == BK_Unknown) - calculateBraceTypes(); - assert(FormatTok->BlockKind != BK_Unknown); - if (FormatTok->BlockKind == BK_Block) - return false; - nextToken(); - parseBracedList(); - return true; -} - -bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons, - tok::TokenKind ClosingBraceKind) { - bool HasError = false; - - // FIXME: Once we have an expression parser in the UnwrappedLineParser, - // replace this by using parseAssigmentExpression() inside. - do { - if (Style.Language == FormatStyle::LK_JavaScript) { - if (FormatTok->is(Keywords.kw_function) || - FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) { - tryToParseJSFunction(); - continue; - } - if (FormatTok->is(TT_JsFatArrow)) { - nextToken(); - // Fat arrows can be followed by simple expressions or by child blocks - // in curly braces. - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - continue; - } - } - if (FormatTok->is(tok::l_brace)) { - // Could be a method inside of a braced list `{a() { return 1; }}`. - if (tryToParseBracedList()) - continue; - parseChildBlock(); - } - } - if (FormatTok->Tok.getKind() == ClosingBraceKind) { - nextToken(); - return !HasError; - } - switch (FormatTok->Tok.getKind()) { - case tok::caret: - nextToken(); - if (FormatTok->is(tok::l_brace)) { - parseChildBlock(); - } - break; - case tok::l_square: - tryToParseLambda(); - break; - case tok::l_paren: - parseParens(); - // JavaScript can just have free standing methods and getters/setters in - // object literals. Detect them by a "{" following ")". - if (Style.Language == FormatStyle::LK_JavaScript) { - if (FormatTok->is(tok::l_brace)) - parseChildBlock(); - break; - } - break; - case tok::l_brace: - // Assume there are no blocks inside a braced init list apart - // from the ones we explicitly parse out (like lambdas). - FormatTok->BlockKind = BK_BracedInit; - nextToken(); - parseBracedList(); - break; - case tok::less: - if (Style.Language == FormatStyle::LK_Proto) { - nextToken(); - parseBracedList(/*ContinueOnSemicolons=*/false, - /*ClosingBraceKind=*/tok::greater); - } else { - nextToken(); - } - break; - case tok::semi: - // JavaScript (or more precisely TypeScript) can have semicolons in braced - // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be - // used for error recovery if we have otherwise determined that this is - // a braced list. - if (Style.Language == FormatStyle::LK_JavaScript) { - nextToken(); - break; - } - HasError = true; - if (!ContinueOnSemicolons) - return !HasError; - nextToken(); - break; - case tok::comma: - nextToken(); - break; - default: - nextToken(); - break; - } - } while (!eof()); - return false; -} - -void UnwrappedLineParser::parseParens() { - assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); - nextToken(); - do { - switch (FormatTok->Tok.getKind()) { - case tok::l_paren: - parseParens(); - if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) - parseChildBlock(); - break; - case tok::r_paren: - nextToken(); - return; - case tok::r_brace: - // A "}" inside parenthesis is an error if there wasn't a matching "{". - return; - case tok::l_square: - tryToParseLambda(); - break; - case tok::l_brace: - if (!tryToParseBracedList()) - parseChildBlock(); - break; - case tok::at: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - nextToken(); - parseBracedList(); - } - break; - case tok::kw_class: - if (Style.Language == FormatStyle::LK_JavaScript) - parseRecord(/*ParseAsExpr=*/true); - else - nextToken(); - break; - case tok::identifier: - if (Style.Language == FormatStyle::LK_JavaScript && - (FormatTok->is(Keywords.kw_function) || - FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function))) - tryToParseJSFunction(); - else - nextToken(); - break; - default: - nextToken(); - break; - } - } while (!eof()); -} - -void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) { - if (!LambdaIntroducer) { - assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); - if (tryToParseLambda()) - return; - } - do { - switch (FormatTok->Tok.getKind()) { - case tok::l_paren: - parseParens(); - break; - case tok::r_square: - nextToken(); - return; - case tok::r_brace: - // A "}" inside parenthesis is an error if there wasn't a matching "{". - return; - case tok::l_square: - parseSquare(); - break; - case tok::l_brace: { - if (!tryToParseBracedList()) - parseChildBlock(); - break; - } - case tok::at: - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - nextToken(); - parseBracedList(); - } - break; - default: - nextToken(); - break; - } - } while (!eof()); -} - -void UnwrappedLineParser::parseIfThenElse() { - assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); - nextToken(); - if (FormatTok->Tok.is(tok::kw_constexpr)) - nextToken(); - if (FormatTok->Tok.is(tok::l_paren)) - parseParens(); - bool NeedsUnwrappedLine = false; - if (FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - if (Style.BraceWrapping.BeforeElse) - addUnwrappedLine(); - else - NeedsUnwrappedLine = true; - } else { - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - --Line->Level; - } - if (FormatTok->Tok.is(tok::kw_else)) { - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - } else if (FormatTok->Tok.is(tok::kw_if)) { - parseIfThenElse(); - } else { - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - if (FormatTok->is(tok::eof)) - addUnwrappedLine(); - --Line->Level; - } - } else if (NeedsUnwrappedLine) { - addUnwrappedLine(); - } -} - -void UnwrappedLineParser::parseTryCatch() { - assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); - nextToken(); - bool NeedsUnwrappedLine = false; - if (FormatTok->is(tok::colon)) { - // We are in a function try block, what comes is an initializer list. - nextToken(); - while (FormatTok->is(tok::identifier)) { - nextToken(); - if (FormatTok->is(tok::l_paren)) - parseParens(); - if (FormatTok->is(tok::comma)) - nextToken(); - } - } - // Parse try with resource. - if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { - parseParens(); - } - if (FormatTok->is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - if (Style.BraceWrapping.BeforeCatch) { - addUnwrappedLine(); - } else { - NeedsUnwrappedLine = true; - } - } else if (!FormatTok->is(tok::kw_catch)) { - // The C++ standard requires a compound-statement after a try. - // If there's none, we try to assume there's a structuralElement - // and try to continue. - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - --Line->Level; - } - while (1) { - if (FormatTok->is(tok::at)) - nextToken(); - if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, - tok::kw___finally) || - ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - FormatTok->is(Keywords.kw_finally)) || - (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || - FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) - break; - nextToken(); - while (FormatTok->isNot(tok::l_brace)) { - if (FormatTok->is(tok::l_paren)) { - parseParens(); - continue; - } - if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) - return; - nextToken(); - } - NeedsUnwrappedLine = false; - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - if (Style.BraceWrapping.BeforeCatch) - addUnwrappedLine(); - else - NeedsUnwrappedLine = true; - } - if (NeedsUnwrappedLine) - addUnwrappedLine(); -} - -void UnwrappedLineParser::parseNamespace() { - assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); - - const FormatToken &InitialToken = *FormatTok; - nextToken(); - while (FormatTok->isOneOf(tok::identifier, tok::coloncolon)) - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - if (ShouldBreakBeforeBrace(Style, InitialToken)) - addUnwrappedLine(); - - bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || - (Style.NamespaceIndentation == FormatStyle::NI_Inner && - DeclarationScopeStack.size() > 1); - parseBlock(/*MustBeDeclaration=*/true, AddLevel); - // Munch the semicolon after a namespace. This is more common than one would - // think. Puttin the semicolon into its own line is very ugly. - if (FormatTok->Tok.is(tok::semi)) - nextToken(); - addUnwrappedLine(); - } - // FIXME: Add error handling. -} - -void UnwrappedLineParser::parseNew() { - assert(FormatTok->is(tok::kw_new) && "'new' expected"); - nextToken(); - if (Style.Language != FormatStyle::LK_Java) - return; - - // In Java, we can parse everything up to the parens, which aren't optional. - do { - // There should not be a ;, { or } before the new's open paren. - if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) - return; - - // Consume the parens. - if (FormatTok->is(tok::l_paren)) { - parseParens(); - - // If there is a class body of an anonymous class, consume that as child. - if (FormatTok->is(tok::l_brace)) - parseChildBlock(); - return; - } - nextToken(); - } while (!eof()); -} - -void UnwrappedLineParser::parseForOrWhileLoop() { - assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && - "'for', 'while' or foreach macro expected"); - nextToken(); - // JS' for await ( ... - if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->is(Keywords.kw_await)) - nextToken(); - if (FormatTok->Tok.is(tok::l_paren)) - parseParens(); - if (FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - } else { - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - --Line->Level; - } -} - -void UnwrappedLineParser::parseDoWhile() { - assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); - nextToken(); - if (FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - if (Style.BraceWrapping.IndentBraces) - addUnwrappedLine(); - } else { - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - --Line->Level; - } - - // FIXME: Add error handling. - if (!FormatTok->Tok.is(tok::kw_while)) { - addUnwrappedLine(); - return; - } - - nextToken(); - parseStructuralElement(); -} - -void UnwrappedLineParser::parseLabel() { - nextToken(); - unsigned OldLineLevel = Line->Level; - if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) - --Line->Level; - if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - if (FormatTok->Tok.is(tok::kw_break)) { - if (Style.BraceWrapping.AfterControlStatement) - addUnwrappedLine(); - parseStructuralElement(); - } - addUnwrappedLine(); - } else { - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - } - Line->Level = OldLineLevel; - if (FormatTok->isNot(tok::l_brace)) { - parseStructuralElement(); - addUnwrappedLine(); - } -} - -void UnwrappedLineParser::parseCaseLabel() { - assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); - // FIXME: fix handling of complex expressions here. - do { - nextToken(); - } while (!eof() && !FormatTok->Tok.is(tok::colon)); - parseLabel(); -} - -void UnwrappedLineParser::parseSwitch() { - assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); - nextToken(); - if (FormatTok->Tok.is(tok::l_paren)) - parseParens(); - if (FormatTok->Tok.is(tok::l_brace)) { - CompoundStatementIndenter Indenter(this, Style, Line->Level); - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - } else { - addUnwrappedLine(); - ++Line->Level; - parseStructuralElement(); - --Line->Level; - } -} - -void UnwrappedLineParser::parseAccessSpecifier() { - nextToken(); - // Understand Qt's slots. - if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) - nextToken(); - // Otherwise, we don't know what it is, and we'd better keep the next token. - if (FormatTok->Tok.is(tok::colon)) - nextToken(); - addUnwrappedLine(); -} - -bool UnwrappedLineParser::parseEnum() { - // Won't be 'enum' for NS_ENUMs. - if (FormatTok->Tok.is(tok::kw_enum)) - nextToken(); - - // In TypeScript, "enum" can also be used as property name, e.g. in interface - // declarations. An "enum" keyword followed by a colon would be a syntax - // error and thus assume it is just an identifier. - if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->isOneOf(tok::colon, tok::question)) - return false; - - // Eat up enum class ... - if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) - nextToken(); - - while (FormatTok->Tok.getIdentifierInfo() || - FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, - tok::greater, tok::comma, tok::question)) { - nextToken(); - // We can have macros or attributes in between 'enum' and the enum name. - if (FormatTok->is(tok::l_paren)) - parseParens(); - if (FormatTok->is(tok::identifier)) { - nextToken(); - // If there are two identifiers in a row, this is likely an elaborate - // return type. In Java, this can be "implements", etc. - if (Style.isCpp() && FormatTok->is(tok::identifier)) - return false; - } - } - - // Just a declaration or something is wrong. - if (FormatTok->isNot(tok::l_brace)) - return true; - FormatTok->BlockKind = BK_Block; - - if (Style.Language == FormatStyle::LK_Java) { - // Java enums are different. - parseJavaEnumBody(); - return true; - } - if (Style.Language == FormatStyle::LK_Proto) { - parseBlock(/*MustBeDeclaration=*/true); - return true; - } - - // Parse enum body. - nextToken(); - bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); - if (HasError) { - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); - } - return true; - - // There is no addUnwrappedLine() here so that we fall through to parsing a - // structural element afterwards. Thus, in "enum A {} n, m;", - // "} n, m;" will end up in one unwrapped line. -} - -void UnwrappedLineParser::parseJavaEnumBody() { - // Determine whether the enum is simple, i.e. does not have a semicolon or - // constants with class bodies. Simple enums can be formatted like braced - // lists, contracted to a single line, etc. - unsigned StoredPosition = Tokens->getPosition(); - bool IsSimple = true; - FormatToken *Tok = Tokens->getNextToken(); - while (Tok) { - if (Tok->is(tok::r_brace)) - break; - if (Tok->isOneOf(tok::l_brace, tok::semi)) { - IsSimple = false; - break; - } - // FIXME: This will also mark enums with braces in the arguments to enum - // constants as "not simple". This is probably fine in practice, though. - Tok = Tokens->getNextToken(); - } - FormatTok = Tokens->setPosition(StoredPosition); - - if (IsSimple) { - nextToken(); - parseBracedList(); - addUnwrappedLine(); - return; - } - - // Parse the body of a more complex enum. - // First add a line for everything up to the "{". - nextToken(); - addUnwrappedLine(); - ++Line->Level; - - // Parse the enum constants. - while (FormatTok) { - if (FormatTok->is(tok::l_brace)) { - // Parse the constant's class body. - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, - /*MunchSemi=*/false); - } else if (FormatTok->is(tok::l_paren)) { - parseParens(); - } else if (FormatTok->is(tok::comma)) { - nextToken(); - addUnwrappedLine(); - } else if (FormatTok->is(tok::semi)) { - nextToken(); - addUnwrappedLine(); - break; - } else if (FormatTok->is(tok::r_brace)) { - addUnwrappedLine(); - break; - } else { - nextToken(); - } - } - - // Parse the class body after the enum's ";" if any. - parseLevel(/*HasOpeningBrace=*/true); - nextToken(); - --Line->Level; - addUnwrappedLine(); -} - -void UnwrappedLineParser::parseRecord(bool ParseAsExpr) { - const FormatToken &InitialToken = *FormatTok; - nextToken(); - - // The actual identifier can be a nested name specifier, and in macros - // it is often token-pasted. - while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, - tok::kw___attribute, tok::kw___declspec, - tok::kw_alignas) || - ((Style.Language == FormatStyle::LK_Java || - Style.Language == FormatStyle::LK_JavaScript) && - FormatTok->isOneOf(tok::period, tok::comma))) { - if (Style.Language == FormatStyle::LK_JavaScript && - FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) { - // JavaScript/TypeScript supports inline object types in - // extends/implements positions: - // class Foo implements {bar: number} { } - nextToken(); - if (FormatTok->is(tok::l_brace)) { - tryToParseBracedList(); - continue; - } - } - bool IsNonMacroIdentifier = - FormatTok->is(tok::identifier) && - FormatTok->TokenText != FormatTok->TokenText.upper(); - nextToken(); - // We can have macros or attributes in between 'class' and the class name. - if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) - parseParens(); - } - - // Note that parsing away template declarations here leads to incorrectly - // accepting function declarations as record declarations. - // In general, we cannot solve this problem. Consider: - // class A<int> B() {} - // which can be a function definition or a class definition when B() is a - // macro. If we find enough real-world cases where this is a problem, we - // can parse for the 'template' keyword in the beginning of the statement, - // and thus rule out the record production in case there is no template - // (this would still leave us with an ambiguity between template function - // and class declarations). - if (FormatTok->isOneOf(tok::colon, tok::less)) { - while (!eof()) { - if (FormatTok->is(tok::l_brace)) { - calculateBraceTypes(/*ExpectClassBody=*/true); - if (!tryToParseBracedList()) - break; - } - if (FormatTok->Tok.is(tok::semi)) - return; - nextToken(); - } - } - if (FormatTok->Tok.is(tok::l_brace)) { - if (ParseAsExpr) { - parseChildBlock(); - } else { - if (ShouldBreakBeforeBrace(Style, InitialToken)) - addUnwrappedLine(); - - parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, - /*MunchSemi=*/false); - } - } - // There is no addUnwrappedLine() here so that we fall through to parsing a - // structural element afterwards. Thus, in "class A {} n, m;", - // "} n, m;" will end up in one unwrapped line. -} - -void UnwrappedLineParser::parseObjCMethod() { - assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) && - "'(' or identifier expected."); - do { - if (FormatTok->Tok.is(tok::semi)) { - nextToken(); - addUnwrappedLine(); - return; - } else if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterFunction) - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/false); - addUnwrappedLine(); - return; - } else { - nextToken(); - } - } while (!eof()); -} - -void UnwrappedLineParser::parseObjCProtocolList() { - assert(FormatTok->Tok.is(tok::less) && "'<' expected."); - do { - nextToken(); - // Early exit in case someone forgot a close angle. - if (FormatTok->isOneOf(tok::semi, tok::l_brace) || - FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) - return; - } while (!eof() && FormatTok->Tok.isNot(tok::greater)); - nextToken(); // Skip '>'. -} - -void UnwrappedLineParser::parseObjCUntilAtEnd() { - do { - if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { - nextToken(); - addUnwrappedLine(); - break; - } - if (FormatTok->is(tok::l_brace)) { - parseBlock(/*MustBeDeclaration=*/false); - // In ObjC interfaces, nothing should be following the "}". - addUnwrappedLine(); - } else if (FormatTok->is(tok::r_brace)) { - // Ignore stray "}". parseStructuralElement doesn't consume them. - nextToken(); - addUnwrappedLine(); - } else if (FormatTok->isOneOf(tok::minus, tok::plus)) { - nextToken(); - parseObjCMethod(); - } else { - parseStructuralElement(); - } - } while (!eof()); -} - -void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { - assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface || - FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation); - nextToken(); - nextToken(); // interface name - - // @interface can be followed by a lightweight generic - // specialization list, then either a base class or a category. - if (FormatTok->Tok.is(tok::less)) { - // Unlike protocol lists, generic parameterizations support - // nested angles: - // - // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> : - // NSObject <NSCopying, NSSecureCoding> - // - // so we need to count how many open angles we have left. - unsigned NumOpenAngles = 1; - do { - nextToken(); - // Early exit in case someone forgot a close angle. - if (FormatTok->isOneOf(tok::semi, tok::l_brace) || - FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) - break; - if (FormatTok->Tok.is(tok::less)) - ++NumOpenAngles; - else if (FormatTok->Tok.is(tok::greater)) { - assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative"); - --NumOpenAngles; - } - } while (!eof() && NumOpenAngles != 0); - nextToken(); // Skip '>'. - } - if (FormatTok->Tok.is(tok::colon)) { - nextToken(); - nextToken(); // base class name - } else if (FormatTok->Tok.is(tok::l_paren)) - // Skip category, if present. - parseParens(); - - if (FormatTok->Tok.is(tok::less)) - parseObjCProtocolList(); - - if (FormatTok->Tok.is(tok::l_brace)) { - if (Style.BraceWrapping.AfterObjCDeclaration) - addUnwrappedLine(); - parseBlock(/*MustBeDeclaration=*/true); - } - - // With instance variables, this puts '}' on its own line. Without instance - // variables, this ends the @interface line. - addUnwrappedLine(); - - parseObjCUntilAtEnd(); -} - -// Returns true for the declaration/definition form of @protocol, -// false for the expression form. -bool UnwrappedLineParser::parseObjCProtocol() { - assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol); - nextToken(); - - if (FormatTok->is(tok::l_paren)) - // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);". - return false; - - // The definition/declaration form, - // @protocol Foo - // - (int)someMethod; - // @end - - nextToken(); // protocol name - - if (FormatTok->Tok.is(tok::less)) - parseObjCProtocolList(); - - // Check for protocol declaration. - if (FormatTok->Tok.is(tok::semi)) { - nextToken(); - addUnwrappedLine(); - return true; - } - - addUnwrappedLine(); - parseObjCUntilAtEnd(); - return true; -} - -void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { - bool IsImport = FormatTok->is(Keywords.kw_import); - assert(IsImport || FormatTok->is(tok::kw_export)); - nextToken(); - - // Consume the "default" in "export default class/function". - if (FormatTok->is(tok::kw_default)) - nextToken(); - - // Consume "async function", "function" and "default function", so that these - // get parsed as free-standing JS functions, i.e. do not require a trailing - // semicolon. - if (FormatTok->is(Keywords.kw_async)) - nextToken(); - if (FormatTok->is(Keywords.kw_function)) { - nextToken(); - return; - } - - // For imports, `export *`, `export {...}`, consume the rest of the line up - // to the terminating `;`. For everything else, just return and continue - // parsing the structural element, i.e. the declaration or expression for - // `export default`. - if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) && - !FormatTok->isStringLiteral()) - return; - - while (!eof()) { - if (FormatTok->is(tok::semi)) - return; - if (Line->Tokens.empty()) { - // Common issue: Automatic Semicolon Insertion wrapped the line, so the - // import statement should terminate. - return; - } - if (FormatTok->is(tok::l_brace)) { - FormatTok->BlockKind = BK_Block; - nextToken(); - parseBracedList(); - } else { - nextToken(); - } - } -} - -void UnwrappedLineParser::parseStatementMacro() -{ - nextToken(); - if (FormatTok->is(tok::l_paren)) - parseParens(); - if (FormatTok->is(tok::semi)) - nextToken(); - addUnwrappedLine(); -} - -LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, - StringRef Prefix = "") { - llvm::dbgs() << Prefix << "Line(" << Line.Level - << ", FSC=" << Line.FirstStartColumn << ")" - << (Line.InPPDirective ? " MACRO" : "") << ": "; - for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), - E = Line.Tokens.end(); - I != E; ++I) { - llvm::dbgs() << I->Tok->Tok.getName() << "[" - << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn - << "] "; - } - for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), - E = Line.Tokens.end(); - I != E; ++I) { - const UnwrappedLineNode &Node = *I; - for (SmallVectorImpl<UnwrappedLine>::const_iterator - I = Node.Children.begin(), - E = Node.Children.end(); - I != E; ++I) { - printDebugInfo(*I, "\nChild: "); - } - } - llvm::dbgs() << "\n"; -} - -void UnwrappedLineParser::addUnwrappedLine() { - if (Line->Tokens.empty()) - return; - LLVM_DEBUG({ - if (CurrentLines == &Lines) - printDebugInfo(*Line); - }); - CurrentLines->push_back(std::move(*Line)); - Line->Tokens.clear(); - Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; - Line->FirstStartColumn = 0; - if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { - CurrentLines->append( - std::make_move_iterator(PreprocessorDirectives.begin()), - std::make_move_iterator(PreprocessorDirectives.end())); - PreprocessorDirectives.clear(); - } - // Disconnect the current token from the last token on the previous line. - FormatTok->Previous = nullptr; -} - -bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } - -bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { - return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && - FormatTok.NewlinesBefore > 0; -} - -// Checks if \p FormatTok is a line comment that continues the line comment -// section on \p Line. -static bool continuesLineCommentSection(const FormatToken &FormatTok, - const UnwrappedLine &Line, - llvm::Regex &CommentPragmasRegex) { - if (Line.Tokens.empty()) - return false; - - StringRef IndentContent = FormatTok.TokenText; - if (FormatTok.TokenText.startswith("//") || - FormatTok.TokenText.startswith("/*")) - IndentContent = FormatTok.TokenText.substr(2); - if (CommentPragmasRegex.match(IndentContent)) - return false; - - // If Line starts with a line comment, then FormatTok continues the comment - // section if its original column is greater or equal to the original start - // column of the line. - // - // Define the min column token of a line as follows: if a line ends in '{' or - // contains a '{' followed by a line comment, then the min column token is - // that '{'. Otherwise, the min column token of the line is the first token of - // the line. - // - // If Line starts with a token other than a line comment, then FormatTok - // continues the comment section if its original column is greater than the - // original start column of the min column token of the line. - // - // For example, the second line comment continues the first in these cases: - // - // // first line - // // second line - // - // and: - // - // // first line - // // second line - // - // and: - // - // int i; // first line - // // second line - // - // and: - // - // do { // first line - // // second line - // int i; - // } while (true); - // - // and: - // - // enum { - // a, // first line - // // second line - // b - // }; - // - // The second line comment doesn't continue the first in these cases: - // - // // first line - // // second line - // - // and: - // - // int i; // first line - // // second line - // - // and: - // - // do { // first line - // // second line - // int i; - // } while (true); - // - // and: - // - // enum { - // a, // first line - // // second line - // }; - const FormatToken *MinColumnToken = Line.Tokens.front().Tok; - - // Scan for '{//'. If found, use the column of '{' as a min column for line - // comment section continuation. - const FormatToken *PreviousToken = nullptr; - for (const UnwrappedLineNode &Node : Line.Tokens) { - if (PreviousToken && PreviousToken->is(tok::l_brace) && - isLineComment(*Node.Tok)) { - MinColumnToken = PreviousToken; - break; - } - PreviousToken = Node.Tok; - - // Grab the last newline preceding a token in this unwrapped line. - if (Node.Tok->NewlinesBefore > 0) { - MinColumnToken = Node.Tok; - } - } - if (PreviousToken && PreviousToken->is(tok::l_brace)) { - MinColumnToken = PreviousToken; - } - - return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok, - MinColumnToken); -} - -void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { - bool JustComments = Line->Tokens.empty(); - for (SmallVectorImpl<FormatToken *>::const_iterator - I = CommentsBeforeNextToken.begin(), - E = CommentsBeforeNextToken.end(); - I != E; ++I) { - // Line comments that belong to the same line comment section are put on the - // same line since later we might want to reflow content between them. - // Additional fine-grained breaking of line comment sections is controlled - // by the class BreakableLineCommentSection in case it is desirable to keep - // several line comment sections in the same unwrapped line. - // - // FIXME: Consider putting separate line comment sections as children to the - // unwrapped line instead. - (*I)->ContinuesLineCommentSection = - continuesLineCommentSection(**I, *Line, CommentPragmasRegex); - if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection) - addUnwrappedLine(); - pushToken(*I); - } - if (NewlineBeforeNext && JustComments) - addUnwrappedLine(); - CommentsBeforeNextToken.clear(); -} - -void UnwrappedLineParser::nextToken(int LevelDifference) { - if (eof()) - return; - flushComments(isOnNewLine(*FormatTok)); - pushToken(FormatTok); - FormatToken *Previous = FormatTok; - if (Style.Language != FormatStyle::LK_JavaScript) - readToken(LevelDifference); - else - readTokenWithJavaScriptASI(); - FormatTok->Previous = Previous; -} - -void UnwrappedLineParser::distributeComments( - const SmallVectorImpl<FormatToken *> &Comments, - const FormatToken *NextTok) { - // Whether or not a line comment token continues a line is controlled by - // the method continuesLineCommentSection, with the following caveat: - // - // Define a trail of Comments to be a nonempty proper postfix of Comments such - // that each comment line from the trail is aligned with the next token, if - // the next token exists. If a trail exists, the beginning of the maximal - // trail is marked as a start of a new comment section. - // - // For example in this code: - // - // int a; // line about a - // // line 1 about b - // // line 2 about b - // int b; - // - // the two lines about b form a maximal trail, so there are two sections, the - // first one consisting of the single comment "// line about a" and the - // second one consisting of the next two comments. - if (Comments.empty()) - return; - bool ShouldPushCommentsInCurrentLine = true; - bool HasTrailAlignedWithNextToken = false; - unsigned StartOfTrailAlignedWithNextToken = 0; - if (NextTok) { - // We are skipping the first element intentionally. - for (unsigned i = Comments.size() - 1; i > 0; --i) { - if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) { - HasTrailAlignedWithNextToken = true; - StartOfTrailAlignedWithNextToken = i; - } - } - } - for (unsigned i = 0, e = Comments.size(); i < e; ++i) { - FormatToken *FormatTok = Comments[i]; - if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) { - FormatTok->ContinuesLineCommentSection = false; - } else { - FormatTok->ContinuesLineCommentSection = - continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex); - } - if (!FormatTok->ContinuesLineCommentSection && - (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) { - ShouldPushCommentsInCurrentLine = false; - } - if (ShouldPushCommentsInCurrentLine) { - pushToken(FormatTok); - } else { - CommentsBeforeNextToken.push_back(FormatTok); - } - } -} - -void UnwrappedLineParser::readToken(int LevelDifference) { - SmallVector<FormatToken *, 1> Comments; - do { - FormatTok = Tokens->getNextToken(); - assert(FormatTok); - while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && - (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { - distributeComments(Comments, FormatTok); - Comments.clear(); - // If there is an unfinished unwrapped line, we flush the preprocessor - // directives only after that unwrapped line was finished later. - bool SwitchToPreprocessorLines = !Line->Tokens.empty(); - ScopedLineState BlockState(*this, SwitchToPreprocessorLines); - assert((LevelDifference >= 0 || - static_cast<unsigned>(-LevelDifference) <= Line->Level) && - "LevelDifference makes Line->Level negative"); - Line->Level += LevelDifference; - // Comments stored before the preprocessor directive need to be output - // before the preprocessor directive, at the same level as the - // preprocessor directive, as we consider them to apply to the directive. - flushComments(isOnNewLine(*FormatTok)); - parsePPDirective(); - } - while (FormatTok->Type == TT_ConflictStart || - FormatTok->Type == TT_ConflictEnd || - FormatTok->Type == TT_ConflictAlternative) { - if (FormatTok->Type == TT_ConflictStart) { - conditionalCompilationStart(/*Unreachable=*/false); - } else if (FormatTok->Type == TT_ConflictAlternative) { - conditionalCompilationAlternative(); - } else if (FormatTok->Type == TT_ConflictEnd) { - conditionalCompilationEnd(); - } - FormatTok = Tokens->getNextToken(); - FormatTok->MustBreakBefore = true; - } - - if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) && - !Line->InPPDirective) { - continue; - } - - if (!FormatTok->Tok.is(tok::comment)) { - distributeComments(Comments, FormatTok); - Comments.clear(); - return; - } - - Comments.push_back(FormatTok); - } while (!eof()); - - distributeComments(Comments, nullptr); - Comments.clear(); -} - -void UnwrappedLineParser::pushToken(FormatToken *Tok) { - Line->Tokens.push_back(UnwrappedLineNode(Tok)); - if (MustBreakBeforeNextToken) { - Line->Tokens.back().Tok->MustBreakBefore = true; - MustBreakBeforeNextToken = false; - } -} - -} // end namespace format -} // end namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.h b/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.h deleted file mode 100644 index 55d60dff915..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UnwrappedLineParser.h +++ /dev/null @@ -1,298 +0,0 @@ -//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file contains the declaration of the UnwrappedLineParser, -/// which turns a stream of tokens into UnwrappedLines. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H -#define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H - -#include "FormatToken.h" -#include "clang/Basic/IdentifierTable.h" -#include "clang/Format/Format.h" -#include "llvm/Support/Regex.h" -#include <list> -#include <stack> - -namespace clang { -namespace format { - -struct UnwrappedLineNode; - -/// An unwrapped line is a sequence of \c Token, that we would like to -/// put on a single line if there was no column limit. -/// -/// This is used as a main interface between the \c UnwrappedLineParser and the -/// \c UnwrappedLineFormatter. The key property is that changing the formatting -/// within an unwrapped line does not affect any other unwrapped lines. -struct UnwrappedLine { - UnwrappedLine(); - - // FIXME: Don't use std::list here. - /// The \c Tokens comprising this \c UnwrappedLine. - std::list<UnwrappedLineNode> Tokens; - - /// The indent level of the \c UnwrappedLine. - unsigned Level; - - /// Whether this \c UnwrappedLine is part of a preprocessor directive. - bool InPPDirective; - - bool MustBeDeclaration; - - /// If this \c UnwrappedLine closes a block in a sequence of lines, - /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding - /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be - /// \c kInvalidIndex. - size_t MatchingOpeningBlockLineIndex = kInvalidIndex; - - /// If this \c UnwrappedLine opens a block, stores the index of the - /// line with the corresponding closing brace. - size_t MatchingClosingBlockLineIndex = kInvalidIndex; - - static const size_t kInvalidIndex = -1; - - unsigned FirstStartColumn = 0; -}; - -class UnwrappedLineConsumer { -public: - virtual ~UnwrappedLineConsumer() {} - virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; - virtual void finishRun() = 0; -}; - -class FormatTokenSource; - -class UnwrappedLineParser { -public: - UnwrappedLineParser(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, - ArrayRef<FormatToken *> Tokens, - UnwrappedLineConsumer &Callback); - - void parse(); - -private: - void reset(); - void parseFile(); - void parseLevel(bool HasOpeningBrace); - void parseBlock(bool MustBeDeclaration, bool AddLevel = true, - bool MunchSemi = true); - void parseChildBlock(); - void parsePPDirective(); - void parsePPDefine(); - void parsePPIf(bool IfDef); - void parsePPElIf(); - void parsePPElse(); - void parsePPEndIf(); - void parsePPUnknown(); - void readTokenWithJavaScriptASI(); - void parseStructuralElement(); - bool tryToParseBracedList(); - bool parseBracedList(bool ContinueOnSemicolons = false, - tok::TokenKind ClosingBraceKind = tok::r_brace); - void parseParens(); - void parseSquare(bool LambdaIntroducer = false); - void parseIfThenElse(); - void parseTryCatch(); - void parseForOrWhileLoop(); - void parseDoWhile(); - void parseLabel(); - void parseCaseLabel(); - void parseSwitch(); - void parseNamespace(); - void parseNew(); - void parseAccessSpecifier(); - bool parseEnum(); - void parseJavaEnumBody(); - // Parses a record (aka class) as a top level element. If ParseAsExpr is true, - // parses the record as a child block, i.e. if the class declaration is an - // expression. - void parseRecord(bool ParseAsExpr = false); - void parseObjCMethod(); - void parseObjCProtocolList(); - void parseObjCUntilAtEnd(); - void parseObjCInterfaceOrImplementation(); - bool parseObjCProtocol(); - void parseJavaScriptEs6ImportExport(); - void parseStatementMacro(); - bool tryToParseLambda(); - bool tryToParseLambdaIntroducer(); - void tryToParseJSFunction(); - void addUnwrappedLine(); - bool eof() const; - // LevelDifference is the difference of levels after and before the current - // token. For example: - // - if the token is '{' and opens a block, LevelDifference is 1. - // - if the token is '}' and closes a block, LevelDifference is -1. - void nextToken(int LevelDifference = 0); - void readToken(int LevelDifference = 0); - - // Decides which comment tokens should be added to the current line and which - // should be added as comments before the next token. - // - // Comments specifies the sequence of comment tokens to analyze. They get - // either pushed to the current line or added to the comments before the next - // token. - // - // NextTok specifies the next token. A null pointer NextTok is supported, and - // signifies either the absence of a next token, or that the next token - // shouldn't be taken into accunt for the analysis. - void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, - const FormatToken *NextTok); - - // Adds the comment preceding the next token to unwrapped lines. - void flushComments(bool NewlineBeforeNext); - void pushToken(FormatToken *Tok); - void calculateBraceTypes(bool ExpectClassBody = false); - - // Marks a conditional compilation edge (for example, an '#if', '#ifdef', - // '#else' or merge conflict marker). If 'Unreachable' is true, assumes - // this branch either cannot be taken (for example '#if false'), or should - // not be taken in this round. - void conditionalCompilationCondition(bool Unreachable); - void conditionalCompilationStart(bool Unreachable); - void conditionalCompilationAlternative(); - void conditionalCompilationEnd(); - - bool isOnNewLine(const FormatToken &FormatTok); - - // Compute hash of the current preprocessor branch. - // This is used to identify the different branches, and thus track if block - // open and close in the same branch. - size_t computePPHash() const; - - // FIXME: We are constantly running into bugs where Line.Level is incorrectly - // subtracted from beyond 0. Introduce a method to subtract from Line.Level - // and use that everywhere in the Parser. - std::unique_ptr<UnwrappedLine> Line; - - // Comments are sorted into unwrapped lines by whether they are in the same - // line as the previous token, or not. If not, they belong to the next token. - // Since the next token might already be in a new unwrapped line, we need to - // store the comments belonging to that token. - SmallVector<FormatToken *, 1> CommentsBeforeNextToken; - FormatToken *FormatTok; - bool MustBreakBeforeNextToken; - - // The parsed lines. Only added to through \c CurrentLines. - SmallVector<UnwrappedLine, 8> Lines; - - // Preprocessor directives are parsed out-of-order from other unwrapped lines. - // Thus, we need to keep a list of preprocessor directives to be reported - // after an unwrapped line that has been started was finished. - SmallVector<UnwrappedLine, 4> PreprocessorDirectives; - - // New unwrapped lines are added via CurrentLines. - // Usually points to \c &Lines. While parsing a preprocessor directive when - // there is an unfinished previous unwrapped line, will point to - // \c &PreprocessorDirectives. - SmallVectorImpl<UnwrappedLine> *CurrentLines; - - // We store for each line whether it must be a declaration depending on - // whether we are in a compound statement or not. - std::vector<bool> DeclarationScopeStack; - - const FormatStyle &Style; - const AdditionalKeywords &Keywords; - - llvm::Regex CommentPragmasRegex; - - FormatTokenSource *Tokens; - UnwrappedLineConsumer &Callback; - - // FIXME: This is a temporary measure until we have reworked the ownership - // of the format tokens. The goal is to have the actual tokens created and - // owned outside of and handed into the UnwrappedLineParser. - ArrayRef<FormatToken *> AllTokens; - - // Represents preprocessor branch type, so we can find matching - // #if/#else/#endif directives. - enum PPBranchKind { - PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 - PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 - }; - - struct PPBranch { - PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {} - PPBranchKind Kind; - size_t Line; - }; - - // Keeps a stack of currently active preprocessor branching directives. - SmallVector<PPBranch, 16> PPStack; - - // The \c UnwrappedLineParser re-parses the code for each combination - // of preprocessor branches that can be taken. - // To that end, we take the same branch (#if, #else, or one of the #elif - // branches) for each nesting level of preprocessor branches. - // \c PPBranchLevel stores the current nesting level of preprocessor - // branches during one pass over the code. - int PPBranchLevel; - - // Contains the current branch (#if, #else or one of the #elif branches) - // for each nesting level. - SmallVector<int, 8> PPLevelBranchIndex; - - // Contains the maximum number of branches at each nesting level. - SmallVector<int, 8> PPLevelBranchCount; - - // Contains the number of branches per nesting level we are currently - // in while parsing a preprocessor branch sequence. - // This is used to update PPLevelBranchCount at the end of a branch - // sequence. - std::stack<int> PPChainBranchIndex; - - // Include guard search state. Used to fixup preprocessor indent levels - // so that include guards do not participate in indentation. - enum IncludeGuardState { - IG_Inited, // Search started, looking for #ifndef. - IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition. - IG_Defined, // Matching #define found, checking other requirements. - IG_Found, // All requirements met, need to fix indents. - IG_Rejected, // Search failed or never started. - }; - - // Current state of include guard search. - IncludeGuardState IncludeGuard; - - // Points to the #ifndef condition for a potential include guard. Null unless - // IncludeGuardState == IG_IfNdefed. - FormatToken *IncludeGuardToken; - - // Contains the first start column where the source begins. This is zero for - // normal source code and may be nonzero when formatting a code fragment that - // does not start at the beginning of the file. - unsigned FirstStartColumn; - - friend class ScopedLineState; - friend class CompoundStatementIndenter; -}; - -struct UnwrappedLineNode { - UnwrappedLineNode() : Tok(nullptr) {} - UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} - - FormatToken *Tok; - SmallVector<UnwrappedLine, 0> Children; -}; - -inline UnwrappedLine::UnwrappedLine() - : Level(0), InPPDirective(false), MustBeDeclaration(false), - MatchingOpeningBlockLineIndex(kInvalidIndex) {} - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp b/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp deleted file mode 100644 index 9e49e791303..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.cpp +++ /dev/null @@ -1,216 +0,0 @@ -//===--- UsingDeclarationsSorter.cpp ----------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements UsingDeclarationsSorter, a TokenAnalyzer that -/// sorts consecutive using declarations. -/// -//===----------------------------------------------------------------------===// - -#include "UsingDeclarationsSorter.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Regex.h" - -#include <algorithm> - -#define DEBUG_TYPE "using-declarations-sorter" - -namespace clang { -namespace format { - -namespace { - -// The order of using declaration is defined as follows: -// Split the strings by "::" and discard any initial empty strings. The last -// element of each list is a non-namespace name; all others are namespace -// names. Sort the lists of names lexicographically, where the sort order of -// individual names is that all non-namespace names come before all namespace -// names, and within those groups, names are in case-insensitive lexicographic -// order. -int compareLabels(StringRef A, StringRef B) { - SmallVector<StringRef, 2> NamesA; - A.split(NamesA, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - SmallVector<StringRef, 2> NamesB; - B.split(NamesB, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - size_t SizeA = NamesA.size(); - size_t SizeB = NamesB.size(); - for (size_t I = 0, E = std::min(SizeA, SizeB); I < E; ++I) { - if (I + 1 == SizeA) { - // I is the last index of NamesA and NamesA[I] is a non-namespace name. - - // Non-namespace names come before all namespace names. - if (SizeB > SizeA) - return -1; - - // Two names within a group compare case-insensitively. - return NamesA[I].compare_lower(NamesB[I]); - } - - // I is the last index of NamesB and NamesB[I] is a non-namespace name. - // Non-namespace names come before all namespace names. - if (I + 1 == SizeB) - return 1; - - // Two namespaces names within a group compare case-insensitively. - int C = NamesA[I].compare_lower(NamesB[I]); - if (C != 0) - return C; - } - return 0; -} - -struct UsingDeclaration { - const AnnotatedLine *Line; - std::string Label; - - UsingDeclaration(const AnnotatedLine *Line, const std::string &Label) - : Line(Line), Label(Label) {} - - bool operator<(const UsingDeclaration &Other) const { - return compareLabels(Label, Other.Label) < 0; - } -}; - -/// Computes the label of a using declaration starting at tthe using token -/// \p UsingTok. -/// If \p UsingTok doesn't begin a using declaration, returns the empty string. -/// Note that this detects specifically using declarations, as in: -/// using A::B::C; -/// and not type aliases, as in: -/// using A = B::C; -/// Type aliases are in general not safe to permute. -std::string computeUsingDeclarationLabel(const FormatToken *UsingTok) { - assert(UsingTok && UsingTok->is(tok::kw_using) && "Expecting a using token"); - std::string Label; - const FormatToken *Tok = UsingTok->Next; - if (Tok && Tok->is(tok::kw_typename)) { - Label.append("typename "); - Tok = Tok->Next; - } - if (Tok && Tok->is(tok::coloncolon)) { - Label.append("::"); - Tok = Tok->Next; - } - bool HasIdentifier = false; - while (Tok && Tok->is(tok::identifier)) { - HasIdentifier = true; - Label.append(Tok->TokenText.str()); - Tok = Tok->Next; - if (!Tok || Tok->isNot(tok::coloncolon)) - break; - Label.append("::"); - Tok = Tok->Next; - } - if (HasIdentifier && Tok && Tok->isOneOf(tok::semi, tok::comma)) - return Label; - return ""; -} - -void endUsingDeclarationBlock( - SmallVectorImpl<UsingDeclaration> *UsingDeclarations, - const SourceManager &SourceMgr, tooling::Replacements *Fixes) { - bool BlockAffected = false; - for (const UsingDeclaration &Declaration : *UsingDeclarations) { - if (Declaration.Line->Affected) { - BlockAffected = true; - break; - } - } - if (!BlockAffected) { - UsingDeclarations->clear(); - return; - } - SmallVector<UsingDeclaration, 4> SortedUsingDeclarations( - UsingDeclarations->begin(), UsingDeclarations->end()); - std::stable_sort(SortedUsingDeclarations.begin(), - SortedUsingDeclarations.end()); - SortedUsingDeclarations.erase( - std::unique(SortedUsingDeclarations.begin(), - SortedUsingDeclarations.end(), - [](const UsingDeclaration &a, const UsingDeclaration &b) { - return a.Label == b.Label; - }), - SortedUsingDeclarations.end()); - for (size_t I = 0, E = UsingDeclarations->size(); I < E; ++I) { - if (I >= SortedUsingDeclarations.size()) { - // This using declaration has been deduplicated, delete it. - auto Begin = - (*UsingDeclarations)[I].Line->First->WhitespaceRange.getBegin(); - auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc(); - auto Range = CharSourceRange::getCharRange(Begin, End); - auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, "")); - if (Err) { - llvm::errs() << "Error while sorting using declarations: " - << llvm::toString(std::move(Err)) << "\n"; - } - continue; - } - if ((*UsingDeclarations)[I].Line == SortedUsingDeclarations[I].Line) - continue; - auto Begin = (*UsingDeclarations)[I].Line->First->Tok.getLocation(); - auto End = (*UsingDeclarations)[I].Line->Last->Tok.getEndLoc(); - auto SortedBegin = - SortedUsingDeclarations[I].Line->First->Tok.getLocation(); - auto SortedEnd = SortedUsingDeclarations[I].Line->Last->Tok.getEndLoc(); - StringRef Text(SourceMgr.getCharacterData(SortedBegin), - SourceMgr.getCharacterData(SortedEnd) - - SourceMgr.getCharacterData(SortedBegin)); - LLVM_DEBUG({ - StringRef OldText(SourceMgr.getCharacterData(Begin), - SourceMgr.getCharacterData(End) - - SourceMgr.getCharacterData(Begin)); - llvm::dbgs() << "Replacing '" << OldText << "' with '" << Text << "'\n"; - }); - auto Range = CharSourceRange::getCharRange(Begin, End); - auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, Text)); - if (Err) { - llvm::errs() << "Error while sorting using declarations: " - << llvm::toString(std::move(Err)) << "\n"; - } - } - UsingDeclarations->clear(); -} - -} // namespace - -UsingDeclarationsSorter::UsingDeclarationsSorter(const Environment &Env, - const FormatStyle &Style) - : TokenAnalyzer(Env, Style) {} - -std::pair<tooling::Replacements, unsigned> UsingDeclarationsSorter::analyze( - TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) { - const SourceManager &SourceMgr = Env.getSourceManager(); - AffectedRangeMgr.computeAffectedLines(AnnotatedLines); - tooling::Replacements Fixes; - SmallVector<UsingDeclaration, 4> UsingDeclarations; - for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { - const auto *FirstTok = AnnotatedLines[I]->First; - if (AnnotatedLines[I]->InPPDirective || - !AnnotatedLines[I]->startsWith(tok::kw_using) || FirstTok->Finalized) { - endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - continue; - } - if (FirstTok->NewlinesBefore > 1) - endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - const auto *UsingTok = - FirstTok->is(tok::comment) ? FirstTok->getNextNonComment() : FirstTok; - std::string Label = computeUsingDeclarationLabel(UsingTok); - if (Label.empty()) { - endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - continue; - } - UsingDeclarations.push_back(UsingDeclaration(AnnotatedLines[I], Label)); - } - endUsingDeclarationBlock(&UsingDeclarations, SourceMgr, &Fixes); - return {Fixes, 0}; -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h b/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h deleted file mode 100644 index 7e5cf7610d6..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/UsingDeclarationsSorter.h +++ /dev/null @@ -1,37 +0,0 @@ -//===--- UsingDeclarationsSorter.h ------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file declares UsingDeclarationsSorter, a TokenAnalyzer that -/// sorts consecutive using declarations. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H -#define LLVM_CLANG_LIB_FORMAT_USINGDECLARATIONSSORTER_H - -#include "TokenAnalyzer.h" - -namespace clang { -namespace format { - -class UsingDeclarationsSorter : public TokenAnalyzer { -public: - UsingDeclarationsSorter(const Environment &Env, const FormatStyle &Style); - - std::pair<tooling::Replacements, unsigned> - analyze(TokenAnnotator &Annotator, - SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, - FormatTokenLexer &Tokens) override; -}; - -} // end namespace format -} // end namespace clang - -#endif diff --git a/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.cpp b/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.cpp deleted file mode 100644 index 032b1333322..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.cpp +++ /dev/null @@ -1,717 +0,0 @@ -//===--- WhitespaceManager.cpp - Format C++ code --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file implements WhitespaceManager class. -/// -//===----------------------------------------------------------------------===// - -#include "WhitespaceManager.h" -#include "llvm/ADT/STLExtras.h" - -namespace clang { -namespace format { - -bool WhitespaceManager::Change::IsBeforeInFile:: -operator()(const Change &C1, const Change &C2) const { - return SourceMgr.isBeforeInTranslationUnit( - C1.OriginalWhitespaceRange.getBegin(), - C2.OriginalWhitespaceRange.getBegin()); -} - -WhitespaceManager::Change::Change(const FormatToken &Tok, - bool CreateReplacement, - SourceRange OriginalWhitespaceRange, - int Spaces, unsigned StartOfTokenColumn, - unsigned NewlinesBefore, - StringRef PreviousLinePostfix, - StringRef CurrentLinePrefix, - bool ContinuesPPDirective, bool IsInsideToken) - : Tok(&Tok), CreateReplacement(CreateReplacement), - OriginalWhitespaceRange(OriginalWhitespaceRange), - StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore), - PreviousLinePostfix(PreviousLinePostfix), - CurrentLinePrefix(CurrentLinePrefix), - ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces), - IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0), - PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0), - StartOfBlockComment(nullptr), IndentationOffset(0) {} - -void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines, - unsigned Spaces, - unsigned StartOfTokenColumn, - bool InPPDirective) { - if (Tok.Finalized) - return; - Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue; - Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, - Spaces, StartOfTokenColumn, Newlines, "", "", - InPPDirective && !Tok.IsFirst, - /*IsInsideToken=*/false)); -} - -void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, - bool InPPDirective) { - if (Tok.Finalized) - return; - Changes.push_back(Change(Tok, /*CreateReplacement=*/false, - Tok.WhitespaceRange, /*Spaces=*/0, - Tok.OriginalColumn, Tok.NewlinesBefore, "", "", - InPPDirective && !Tok.IsFirst, - /*IsInsideToken=*/false)); -} - -llvm::Error -WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) { - return Replaces.add(Replacement); -} - -void WhitespaceManager::replaceWhitespaceInToken( - const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, - StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, int Spaces) { - if (Tok.Finalized) - return; - SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); - Changes.push_back( - Change(Tok, /*CreateReplacement=*/true, - SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces, - std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix, - InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true)); -} - -const tooling::Replacements &WhitespaceManager::generateReplacements() { - if (Changes.empty()) - return Replaces; - - llvm::sort(Changes, Change::IsBeforeInFile(SourceMgr)); - calculateLineBreakInformation(); - alignConsecutiveDeclarations(); - alignConsecutiveAssignments(); - alignTrailingComments(); - alignEscapedNewlines(); - generateChanges(); - - return Replaces; -} - -void WhitespaceManager::calculateLineBreakInformation() { - Changes[0].PreviousEndOfTokenColumn = 0; - Change *LastOutsideTokenChange = &Changes[0]; - for (unsigned i = 1, e = Changes.size(); i != e; ++i) { - SourceLocation OriginalWhitespaceStart = - Changes[i].OriginalWhitespaceRange.getBegin(); - SourceLocation PreviousOriginalWhitespaceEnd = - Changes[i - 1].OriginalWhitespaceRange.getEnd(); - unsigned OriginalWhitespaceStartOffset = - SourceMgr.getFileOffset(OriginalWhitespaceStart); - unsigned PreviousOriginalWhitespaceEndOffset = - SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd); - assert(PreviousOriginalWhitespaceEndOffset <= - OriginalWhitespaceStartOffset); - const char *const PreviousOriginalWhitespaceEndData = - SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd); - StringRef Text(PreviousOriginalWhitespaceEndData, - SourceMgr.getCharacterData(OriginalWhitespaceStart) - - PreviousOriginalWhitespaceEndData); - // Usually consecutive changes would occur in consecutive tokens. This is - // not the case however when analyzing some preprocessor runs of the - // annotated lines. For example, in this code: - // - // #if A // line 1 - // int i = 1; - // #else B // line 2 - // int i = 2; - // #endif // line 3 - // - // one of the runs will produce the sequence of lines marked with line 1, 2 - // and 3. So the two consecutive whitespace changes just before '// line 2' - // and before '#endif // line 3' span multiple lines and tokens: - // - // #else B{change X}[// line 2 - // int i = 2; - // ]{change Y}#endif // line 3 - // - // For this reason, if the text between consecutive changes spans multiple - // newlines, the token length must be adjusted to the end of the original - // line of the token. - auto NewlinePos = Text.find_first_of('\n'); - if (NewlinePos == StringRef::npos) { - Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset - - PreviousOriginalWhitespaceEndOffset + - Changes[i].PreviousLinePostfix.size() + - Changes[i - 1].CurrentLinePrefix.size(); - } else { - Changes[i - 1].TokenLength = - NewlinePos + Changes[i - 1].CurrentLinePrefix.size(); - } - - // If there are multiple changes in this token, sum up all the changes until - // the end of the line. - if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0) - LastOutsideTokenChange->TokenLength += - Changes[i - 1].TokenLength + Changes[i - 1].Spaces; - else - LastOutsideTokenChange = &Changes[i - 1]; - - Changes[i].PreviousEndOfTokenColumn = - Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength; - - Changes[i - 1].IsTrailingComment = - (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) || - (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) && - Changes[i - 1].Tok->is(tok::comment) && - // FIXME: This is a dirty hack. The problem is that - // BreakableLineCommentSection does comment reflow changes and here is - // the aligning of trailing comments. Consider the case where we reflow - // the second line up in this example: - // - // // line 1 - // // line 2 - // - // That amounts to 2 changes by BreakableLineCommentSection: - // - the first, delimited by (), for the whitespace between the tokens, - // - and second, delimited by [], for the whitespace at the beginning - // of the second token: - // - // // line 1( - // )[// ]line 2 - // - // So in the end we have two changes like this: - // - // // line1()[ ]line 2 - // - // Note that the OriginalWhitespaceStart of the second change is the - // same as the PreviousOriginalWhitespaceEnd of the first change. - // In this case, the below check ensures that the second change doesn't - // get treated as a trailing comment change here, since this might - // trigger additional whitespace to be wrongly inserted before "line 2" - // by the comment aligner here. - // - // For a proper solution we need a mechanism to say to WhitespaceManager - // that a particular change breaks the current sequence of trailing - // comments. - OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd; - } - // FIXME: The last token is currently not always an eof token; in those - // cases, setting TokenLength of the last token to 0 is wrong. - Changes.back().TokenLength = 0; - Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment); - - const WhitespaceManager::Change *LastBlockComment = nullptr; - for (auto &Change : Changes) { - // Reset the IsTrailingComment flag for changes inside of trailing comments - // so they don't get realigned later. Comment line breaks however still need - // to be aligned. - if (Change.IsInsideToken && Change.NewlinesBefore == 0) - Change.IsTrailingComment = false; - Change.StartOfBlockComment = nullptr; - Change.IndentationOffset = 0; - if (Change.Tok->is(tok::comment)) { - if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken) - LastBlockComment = &Change; - else { - if ((Change.StartOfBlockComment = LastBlockComment)) - Change.IndentationOffset = - Change.StartOfTokenColumn - - Change.StartOfBlockComment->StartOfTokenColumn; - } - } else { - LastBlockComment = nullptr; - } - } -} - -// Align a single sequence of tokens, see AlignTokens below. -template <typename F> -static void -AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes) { - bool FoundMatchOnLine = false; - int Shift = 0; - - // ScopeStack keeps track of the current scope depth. It contains indices of - // the first token on each scope. - // We only run the "Matches" function on tokens from the outer-most scope. - // However, we do need to pay special attention to one class of tokens - // that are not in the outer-most scope, and that is function parameters - // which are split across multiple lines, as illustrated by this example: - // double a(int x); - // int b(int y, - // double z); - // In the above example, we need to take special care to ensure that - // 'double z' is indented along with it's owning function 'b'. - SmallVector<unsigned, 16> ScopeStack; - - for (unsigned i = Start; i != End; ++i) { - if (ScopeStack.size() != 0 && - Changes[i].indentAndNestingLevel() < - Changes[ScopeStack.back()].indentAndNestingLevel()) - ScopeStack.pop_back(); - - // Compare current token to previous non-comment token to ensure whether - // it is in a deeper scope or not. - unsigned PreviousNonComment = i - 1; - while (PreviousNonComment > Start && - Changes[PreviousNonComment].Tok->is(tok::comment)) - PreviousNonComment--; - if (i != Start && Changes[i].indentAndNestingLevel() > - Changes[PreviousNonComment].indentAndNestingLevel()) - ScopeStack.push_back(i); - - bool InsideNestedScope = ScopeStack.size() != 0; - - if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) { - Shift = 0; - FoundMatchOnLine = false; - } - - // If this is the first matching token to be aligned, remember by how many - // spaces it has to be shifted, so the rest of the changes on the line are - // shifted by the same amount - if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) { - FoundMatchOnLine = true; - Shift = Column - Changes[i].StartOfTokenColumn; - Changes[i].Spaces += Shift; - } - - // This is for function parameters that are split across multiple lines, - // as mentioned in the ScopeStack comment. - if (InsideNestedScope && Changes[i].NewlinesBefore > 0) { - unsigned ScopeStart = ScopeStack.back(); - if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) || - (ScopeStart > Start + 1 && - Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName))) - Changes[i].Spaces += Shift; - } - - assert(Shift >= 0); - Changes[i].StartOfTokenColumn += Shift; - if (i + 1 != Changes.size()) - Changes[i + 1].PreviousEndOfTokenColumn += Shift; - } -} - -// Walk through a subset of the changes, starting at StartAt, and find -// sequences of matching tokens to align. To do so, keep track of the lines and -// whether or not a matching token was found on a line. If a matching token is -// found, extend the current sequence. If the current line cannot be part of a -// sequence, e.g. because there is an empty line before it or it contains only -// non-matching tokens, finalize the previous sequence. -// The value returned is the token on which we stopped, either because we -// exhausted all items inside Changes, or because we hit a scope level higher -// than our initial scope. -// This function is recursive. Each invocation processes only the scope level -// equal to the initial level, which is the level of Changes[StartAt]. -// If we encounter a scope level greater than the initial level, then we call -// ourselves recursively, thereby avoiding the pollution of the current state -// with the alignment requirements of the nested sub-level. This recursive -// behavior is necessary for aligning function prototypes that have one or more -// arguments. -// If this function encounters a scope level less than the initial level, -// it returns the current position. -// There is a non-obvious subtlety in the recursive behavior: Even though we -// defer processing of nested levels to recursive invocations of this -// function, when it comes time to align a sequence of tokens, we run the -// alignment on the entire sequence, including the nested levels. -// When doing so, most of the nested tokens are skipped, because their -// alignment was already handled by the recursive invocations of this function. -// However, the special exception is that we do NOT skip function parameters -// that are split across multiple lines. See the test case in FormatTest.cpp -// that mentions "split function parameter alignment" for an example of this. -template <typename F> -static unsigned AlignTokens(const FormatStyle &Style, F &&Matches, - SmallVector<WhitespaceManager::Change, 16> &Changes, - unsigned StartAt) { - unsigned MinColumn = 0; - unsigned MaxColumn = UINT_MAX; - - // Line number of the start and the end of the current token sequence. - unsigned StartOfSequence = 0; - unsigned EndOfSequence = 0; - - // Measure the scope level (i.e. depth of (), [], {}) of the first token, and - // abort when we hit any token in a higher scope than the starting one. - auto IndentAndNestingLevel = StartAt < Changes.size() - ? Changes[StartAt].indentAndNestingLevel() - : std::pair<unsigned, unsigned>(0, 0); - - // Keep track of the number of commas before the matching tokens, we will only - // align a sequence of matching tokens if they are preceded by the same number - // of commas. - unsigned CommasBeforeLastMatch = 0; - unsigned CommasBeforeMatch = 0; - - // Whether a matching token has been found on the current line. - bool FoundMatchOnLine = false; - - // Aligns a sequence of matching tokens, on the MinColumn column. - // - // Sequences start from the first matching token to align, and end at the - // first token of the first line that doesn't need to be aligned. - // - // We need to adjust the StartOfTokenColumn of each Change that is on a line - // containing any matching token to be aligned and located after such token. - auto AlignCurrentSequence = [&] { - if (StartOfSequence > 0 && StartOfSequence < EndOfSequence) - AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches, - Changes); - MinColumn = 0; - MaxColumn = UINT_MAX; - StartOfSequence = 0; - EndOfSequence = 0; - }; - - unsigned i = StartAt; - for (unsigned e = Changes.size(); i != e; ++i) { - if (Changes[i].indentAndNestingLevel() < IndentAndNestingLevel) - break; - - if (Changes[i].NewlinesBefore != 0) { - CommasBeforeMatch = 0; - EndOfSequence = i; - // If there is a blank line, or if the last line didn't contain any - // matching token, the sequence ends here. - if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine) - AlignCurrentSequence(); - - FoundMatchOnLine = false; - } - - if (Changes[i].Tok->is(tok::comma)) { - ++CommasBeforeMatch; - } else if (Changes[i].indentAndNestingLevel() > IndentAndNestingLevel) { - // Call AlignTokens recursively, skipping over this scope block. - unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i); - i = StoppedAt - 1; - continue; - } - - if (!Matches(Changes[i])) - continue; - - // If there is more than one matching token per line, or if the number of - // preceding commas, do not match anymore, end the sequence. - if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch) - AlignCurrentSequence(); - - CommasBeforeLastMatch = CommasBeforeMatch; - FoundMatchOnLine = true; - - if (StartOfSequence == 0) - StartOfSequence = i; - - unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - int LineLengthAfter = -Changes[i].Spaces; - for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j) - LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength; - unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter; - - // If we are restricted by the maximum column width, end the sequence. - if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn || - CommasBeforeLastMatch != CommasBeforeMatch) { - AlignCurrentSequence(); - StartOfSequence = i; - } - - MinColumn = std::max(MinColumn, ChangeMinColumn); - MaxColumn = std::min(MaxColumn, ChangeMaxColumn); - } - - EndOfSequence = i; - AlignCurrentSequence(); - return i; -} - -void WhitespaceManager::alignConsecutiveAssignments() { - if (!Style.AlignConsecutiveAssignments) - return; - - AlignTokens(Style, - [&](const Change &C) { - // Do not align on equal signs that are first on a line. - if (C.NewlinesBefore > 0) - return false; - - // Do not align on equal signs that are last on a line. - if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0) - return false; - - return C.Tok->is(tok::equal); - }, - Changes, /*StartAt=*/0); -} - -void WhitespaceManager::alignConsecutiveDeclarations() { - if (!Style.AlignConsecutiveDeclarations) - return; - - // FIXME: Currently we don't handle properly the PointerAlignment: Right - // The * and & are not aligned and are left dangling. Something has to be done - // about it, but it raises the question of alignment of code like: - // const char* const* v1; - // float const* v2; - // SomeVeryLongType const& v3; - AlignTokens(Style, - [](Change const &C) { - // tok::kw_operator is necessary for aligning operator overload - // definitions. - return C.Tok->is(TT_StartOfName) || - C.Tok->is(TT_FunctionDeclarationName) || - C.Tok->is(tok::kw_operator); - }, - Changes, /*StartAt=*/0); -} - -void WhitespaceManager::alignTrailingComments() { - unsigned MinColumn = 0; - unsigned MaxColumn = UINT_MAX; - unsigned StartOfSequence = 0; - bool BreakBeforeNext = false; - unsigned Newlines = 0; - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { - if (Changes[i].StartOfBlockComment) - continue; - Newlines += Changes[i].NewlinesBefore; - if (!Changes[i].IsTrailingComment) - continue; - - unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn; - unsigned ChangeMaxColumn; - - if (Style.ColumnLimit == 0) - ChangeMaxColumn = UINT_MAX; - else if (Style.ColumnLimit >= Changes[i].TokenLength) - ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength; - else - ChangeMaxColumn = ChangeMinColumn; - - // If we don't create a replacement for this change, we have to consider - // it to be immovable. - if (!Changes[i].CreateReplacement) - ChangeMaxColumn = ChangeMinColumn; - - if (i + 1 != e && Changes[i + 1].ContinuesPPDirective) - ChangeMaxColumn -= 2; - // If this comment follows an } in column 0, it probably documents the - // closing of a namespace and we don't want to align it. - bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 && - Changes[i - 1].Tok->is(tok::r_brace) && - Changes[i - 1].StartOfTokenColumn == 0; - bool WasAlignedWithStartOfNextLine = false; - if (Changes[i].NewlinesBefore == 1) { // A comment on its own line. - unsigned CommentColumn = SourceMgr.getSpellingColumnNumber( - Changes[i].OriginalWhitespaceRange.getEnd()); - for (unsigned j = i + 1; j != e; ++j) { - if (Changes[j].Tok->is(tok::comment)) - continue; - - unsigned NextColumn = SourceMgr.getSpellingColumnNumber( - Changes[j].OriginalWhitespaceRange.getEnd()); - // The start of the next token was previously aligned with the - // start of this comment. - WasAlignedWithStartOfNextLine = - CommentColumn == NextColumn || - CommentColumn == NextColumn + Style.IndentWidth; - break; - } - } - if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) { - alignTrailingComments(StartOfSequence, i, MinColumn); - MinColumn = ChangeMinColumn; - MaxColumn = ChangeMinColumn; - StartOfSequence = i; - } else if (BreakBeforeNext || Newlines > 1 || - (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) || - // Break the comment sequence if the previous line did not end - // in a trailing comment. - (Changes[i].NewlinesBefore == 1 && i > 0 && - !Changes[i - 1].IsTrailingComment) || - WasAlignedWithStartOfNextLine) { - alignTrailingComments(StartOfSequence, i, MinColumn); - MinColumn = ChangeMinColumn; - MaxColumn = ChangeMaxColumn; - StartOfSequence = i; - } else { - MinColumn = std::max(MinColumn, ChangeMinColumn); - MaxColumn = std::min(MaxColumn, ChangeMaxColumn); - } - BreakBeforeNext = - (i == 0) || (Changes[i].NewlinesBefore > 1) || - // Never start a sequence with a comment at the beginning of - // the line. - (Changes[i].NewlinesBefore == 1 && StartOfSequence == i); - Newlines = 0; - } - alignTrailingComments(StartOfSequence, Changes.size(), MinColumn); -} - -void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End, - unsigned Column) { - for (unsigned i = Start; i != End; ++i) { - int Shift = 0; - if (Changes[i].IsTrailingComment) { - Shift = Column - Changes[i].StartOfTokenColumn; - } - if (Changes[i].StartOfBlockComment) { - Shift = Changes[i].IndentationOffset + - Changes[i].StartOfBlockComment->StartOfTokenColumn - - Changes[i].StartOfTokenColumn; - } - assert(Shift >= 0); - Changes[i].Spaces += Shift; - if (i + 1 != Changes.size()) - Changes[i + 1].PreviousEndOfTokenColumn += Shift; - Changes[i].StartOfTokenColumn += Shift; - } -} - -void WhitespaceManager::alignEscapedNewlines() { - if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign) - return; - - bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left; - unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit; - unsigned StartOfMacro = 0; - for (unsigned i = 1, e = Changes.size(); i < e; ++i) { - Change &C = Changes[i]; - if (C.NewlinesBefore > 0) { - if (C.ContinuesPPDirective) { - MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine); - } else { - alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine); - MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit; - StartOfMacro = i; - } - } - } - alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine); -} - -void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End, - unsigned Column) { - for (unsigned i = Start; i < End; ++i) { - Change &C = Changes[i]; - if (C.NewlinesBefore > 0) { - assert(C.ContinuesPPDirective); - if (C.PreviousEndOfTokenColumn + 1 > Column) - C.EscapedNewlineColumn = 0; - else - C.EscapedNewlineColumn = Column; - } - } -} - -void WhitespaceManager::generateChanges() { - for (unsigned i = 0, e = Changes.size(); i != e; ++i) { - const Change &C = Changes[i]; - if (i > 0) { - assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() != - C.OriginalWhitespaceRange.getBegin() && - "Generating two replacements for the same location"); - } - if (C.CreateReplacement) { - std::string ReplacementText = C.PreviousLinePostfix; - if (C.ContinuesPPDirective) - appendEscapedNewlineText(ReplacementText, C.NewlinesBefore, - C.PreviousEndOfTokenColumn, - C.EscapedNewlineColumn); - else - appendNewlineText(ReplacementText, C.NewlinesBefore); - appendIndentText(ReplacementText, C.Tok->IndentLevel, - std::max(0, C.Spaces), - C.StartOfTokenColumn - std::max(0, C.Spaces)); - ReplacementText.append(C.CurrentLinePrefix); - storeReplacement(C.OriginalWhitespaceRange, ReplacementText); - } - } -} - -void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) { - unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) - - SourceMgr.getFileOffset(Range.getBegin()); - // Don't create a replacement, if it does not change anything. - if (StringRef(SourceMgr.getCharacterData(Range.getBegin()), - WhitespaceLength) == Text) - return; - auto Err = Replaces.add(tooling::Replacement( - SourceMgr, CharSourceRange::getCharRange(Range), Text)); - // FIXME: better error handling. For now, just print an error message in the - // release version. - if (Err) { - llvm::errs() << llvm::toString(std::move(Err)) << "\n"; - assert(false); - } -} - -void WhitespaceManager::appendNewlineText(std::string &Text, - unsigned Newlines) { - for (unsigned i = 0; i < Newlines; ++i) - Text.append(UseCRLF ? "\r\n" : "\n"); -} - -void WhitespaceManager::appendEscapedNewlineText( - std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn, - unsigned EscapedNewlineColumn) { - if (Newlines > 0) { - unsigned Spaces = - std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1); - for (unsigned i = 0; i < Newlines; ++i) { - Text.append(Spaces, ' '); - Text.append(UseCRLF ? "\\\r\n" : "\\\n"); - Spaces = std::max<int>(0, EscapedNewlineColumn - 1); - } - } -} - -void WhitespaceManager::appendIndentText(std::string &Text, - unsigned IndentLevel, unsigned Spaces, - unsigned WhitespaceStartColumn) { - switch (Style.UseTab) { - case FormatStyle::UT_Never: - Text.append(Spaces, ' '); - break; - case FormatStyle::UT_Always: { - unsigned FirstTabWidth = - Style.TabWidth - WhitespaceStartColumn % Style.TabWidth; - // Indent with tabs only when there's at least one full tab. - if (FirstTabWidth + Style.TabWidth <= Spaces) { - Spaces -= FirstTabWidth; - Text.append("\t"); - } - Text.append(Spaces / Style.TabWidth, '\t'); - Text.append(Spaces % Style.TabWidth, ' '); - break; - } - case FormatStyle::UT_ForIndentation: - if (WhitespaceStartColumn == 0) { - unsigned Indentation = IndentLevel * Style.IndentWidth; - // This happens, e.g. when a line in a block comment is indented less than - // the first one. - if (Indentation > Spaces) - Indentation = Spaces; - unsigned Tabs = Indentation / Style.TabWidth; - Text.append(Tabs, '\t'); - Spaces -= Tabs * Style.TabWidth; - } - Text.append(Spaces, ' '); - break; - case FormatStyle::UT_ForContinuationAndIndentation: - if (WhitespaceStartColumn == 0) { - unsigned Tabs = Spaces / Style.TabWidth; - Text.append(Tabs, '\t'); - Spaces -= Tabs * Style.TabWidth; - } - Text.append(Spaces, ' '); - break; - } -} - -} // namespace format -} // namespace clang diff --git a/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.h b/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.h deleted file mode 100644 index db90343f729..00000000000 --- a/gnu/llvm/tools/clang/lib/Format/WhitespaceManager.h +++ /dev/null @@ -1,215 +0,0 @@ -//===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// WhitespaceManager class manages whitespace around tokens and their -/// replacements. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H -#define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H - -#include "TokenAnnotator.h" -#include "clang/Basic/SourceManager.h" -#include "clang/Format/Format.h" -#include <string> - -namespace clang { -namespace format { - -/// Manages the whitespaces around tokens and their replacements. -/// -/// This includes special handling for certain constructs, e.g. the alignment of -/// trailing line comments. -/// -/// To guarantee correctness of alignment operations, the \c WhitespaceManager -/// must be informed about every token in the source file; for each token, there -/// must be exactly one call to either \c replaceWhitespace or -/// \c addUntouchableToken. -/// -/// There may be multiple calls to \c breakToken for a given token. -class WhitespaceManager { -public: - WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style, - bool UseCRLF) - : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {} - - /// Replaces the whitespace in front of \p Tok. Only call once for - /// each \c AnnotatedToken. - /// - /// \p StartOfTokenColumn is the column at which the token will start after - /// this replacement. It is needed for determining how \p Spaces is turned - /// into tabs and spaces for some format styles. - void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces, - unsigned StartOfTokenColumn, - bool InPPDirective = false); - - /// Adds information about an unchangeable token's whitespace. - /// - /// Needs to be called for every token for which \c replaceWhitespace - /// was not called. - void addUntouchableToken(const FormatToken &Tok, bool InPPDirective); - - llvm::Error addReplacement(const tooling::Replacement &Replacement); - - /// Inserts or replaces whitespace in the middle of a token. - /// - /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix - /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars - /// characters. - /// - /// Note: \p Spaces can be negative to retain information about initial - /// relative column offset between a line of a block comment and the start of - /// the comment. This negative offset may be compensated by trailing comment - /// alignment here. In all other cases negative \p Spaces will be truncated to - /// 0. - /// - /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is - /// used to align backslashes correctly. - void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset, - unsigned ReplaceChars, - StringRef PreviousPostfix, - StringRef CurrentPrefix, bool InPPDirective, - unsigned Newlines, int Spaces); - - /// Returns all the \c Replacements created during formatting. - const tooling::Replacements &generateReplacements(); - - /// Represents a change before a token, a break inside a token, - /// or the layout of an unchanged token (or whitespace within). - struct Change { - /// Functor to sort changes in original source order. - class IsBeforeInFile { - public: - IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {} - bool operator()(const Change &C1, const Change &C2) const; - - private: - const SourceManager &SourceMgr; - }; - - /// Creates a \c Change. - /// - /// The generated \c Change will replace the characters at - /// \p OriginalWhitespaceRange with a concatenation of - /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces - /// and \p CurrentLinePrefix. - /// - /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out - /// trailing comments and escaped newlines. - Change(const FormatToken &Tok, bool CreateReplacement, - SourceRange OriginalWhitespaceRange, int Spaces, - unsigned StartOfTokenColumn, unsigned NewlinesBefore, - StringRef PreviousLinePostfix, StringRef CurrentLinePrefix, - bool ContinuesPPDirective, bool IsInsideToken); - - // The kind of the token whose whitespace this change replaces, or in which - // this change inserts whitespace. - // FIXME: Currently this is not set correctly for breaks inside comments, as - // the \c BreakableToken is still doing its own alignment. - const FormatToken *Tok; - - bool CreateReplacement; - // Changes might be in the middle of a token, so we cannot just keep the - // FormatToken around to query its information. - SourceRange OriginalWhitespaceRange; - unsigned StartOfTokenColumn; - unsigned NewlinesBefore; - std::string PreviousLinePostfix; - std::string CurrentLinePrefix; - bool ContinuesPPDirective; - - // The number of spaces in front of the token or broken part of the token. - // This will be adapted when aligning tokens. - // Can be negative to retain information about the initial relative offset - // of the lines in a block comment. This is used when aligning trailing - // comments. Uncompensated negative offset is truncated to 0. - int Spaces; - - // If this change is inside of a token but not at the start of the token or - // directly after a newline. - bool IsInsideToken; - - // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and - // \c EscapedNewlineColumn will be calculated in - // \c calculateLineBreakInformation. - bool IsTrailingComment; - unsigned TokenLength; - unsigned PreviousEndOfTokenColumn; - unsigned EscapedNewlineColumn; - - // These fields are used to retain correct relative line indentation in a - // block comment when aligning trailing comments. - // - // If this Change represents a continuation of a block comment, - // \c StartOfBlockComment is pointer to the first Change in the block - // comment. \c IndentationOffset is a relative column offset to this - // change, so that the correct column can be reconstructed at the end of - // the alignment process. - const Change *StartOfBlockComment; - int IndentationOffset; - - // A combination of indent level and nesting level, which are used in - // tandem to compute lexical scope, for the purposes of deciding - // when to stop consecutive alignment runs. - std::pair<unsigned, unsigned> indentAndNestingLevel() const { - return std::make_pair(Tok->IndentLevel, Tok->NestingLevel); - } - }; - -private: - /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens - /// or token parts in a line and \c PreviousEndOfTokenColumn and - /// \c EscapedNewlineColumn for the first tokens or token parts in a line. - void calculateLineBreakInformation(); - - /// Align consecutive assignments over all \c Changes. - void alignConsecutiveAssignments(); - - /// Align consecutive declarations over all \c Changes. - void alignConsecutiveDeclarations(); - - /// Align trailing comments over all \c Changes. - void alignTrailingComments(); - - /// Align trailing comments from change \p Start to change \p End at - /// the specified \p Column. - void alignTrailingComments(unsigned Start, unsigned End, unsigned Column); - - /// Align escaped newlines over all \c Changes. - void alignEscapedNewlines(); - - /// Align escaped newlines from change \p Start to change \p End at - /// the specified \p Column. - void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column); - - /// Fill \c Replaces with the replacements for all effective changes. - void generateChanges(); - - /// Stores \p Text as the replacement for the whitespace in \p Range. - void storeReplacement(SourceRange Range, StringRef Text); - void appendNewlineText(std::string &Text, unsigned Newlines); - void appendEscapedNewlineText(std::string &Text, unsigned Newlines, - unsigned PreviousEndOfTokenColumn, - unsigned EscapedNewlineColumn); - void appendIndentText(std::string &Text, unsigned IndentLevel, - unsigned Spaces, unsigned WhitespaceStartColumn); - - SmallVector<Change, 16> Changes; - const SourceManager &SourceMgr; - tooling::Replacements Replaces; - const FormatStyle &Style; - bool UseCRLF; -}; - -} // namespace format -} // namespace clang - -#endif |
