summaryrefslogtreecommitdiffstats
path: root/gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp')
-rw-r--r--gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp660
1 files changed, 660 insertions, 0 deletions
diff --git a/gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp b/gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
new file mode 100644
index 00000000000..932db17b964
--- /dev/null
+++ b/gnu/llvm/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusNameParser.cpp
@@ -0,0 +1,660 @@
+//===-- CPlusPlusNameParser.cpp ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "CPlusPlusNameParser.h"
+
+#include "clang/Basic/IdentifierTable.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Threading.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using llvm::Optional;
+using llvm::None;
+using ParsedFunction = lldb_private::CPlusPlusNameParser::ParsedFunction;
+using ParsedName = lldb_private::CPlusPlusNameParser::ParsedName;
+namespace tok = clang::tok;
+
+Optional<ParsedFunction> CPlusPlusNameParser::ParseAsFunctionDefinition() {
+ m_next_token_index = 0;
+ Optional<ParsedFunction> result(None);
+
+ // Try to parse the name as function without a return type specified e.g.
+ // main(int, char*[])
+ {
+ Bookmark start_position = SetBookmark();
+ result = ParseFunctionImpl(false);
+ if (result && !HasMoreTokens())
+ return result;
+ }
+
+ // Try to parse the name as function with function pointer return type e.g.
+ // void (*get_func(const char*))()
+ result = ParseFuncPtr(true);
+ if (result)
+ return result;
+
+ // Finally try to parse the name as a function with non-function return type
+ // e.g. int main(int, char*[])
+ result = ParseFunctionImpl(true);
+ if (HasMoreTokens())
+ return None;
+ return result;
+}
+
+Optional<ParsedName> CPlusPlusNameParser::ParseAsFullName() {
+ m_next_token_index = 0;
+ Optional<ParsedNameRanges> name_ranges = ParseFullNameImpl();
+ if (!name_ranges)
+ return None;
+ if (HasMoreTokens())
+ return None;
+ ParsedName result;
+ result.basename = GetTextForRange(name_ranges.getValue().basename_range);
+ result.context = GetTextForRange(name_ranges.getValue().context_range);
+ return result;
+}
+
+bool CPlusPlusNameParser::HasMoreTokens() {
+ return m_next_token_index < m_tokens.size();
+}
+
+void CPlusPlusNameParser::Advance() { ++m_next_token_index; }
+
+void CPlusPlusNameParser::TakeBack() { --m_next_token_index; }
+
+bool CPlusPlusNameParser::ConsumeToken(tok::TokenKind kind) {
+ if (!HasMoreTokens())
+ return false;
+
+ if (!Peek().is(kind))
+ return false;
+
+ Advance();
+ return true;
+}
+
+template <typename... Ts> bool CPlusPlusNameParser::ConsumeToken(Ts... kinds) {
+ if (!HasMoreTokens())
+ return false;
+
+ if (!Peek().isOneOf(kinds...))
+ return false;
+
+ Advance();
+ return true;
+}
+
+CPlusPlusNameParser::Bookmark CPlusPlusNameParser::SetBookmark() {
+ return Bookmark(m_next_token_index);
+}
+
+size_t CPlusPlusNameParser::GetCurrentPosition() { return m_next_token_index; }
+
+clang::Token &CPlusPlusNameParser::Peek() {
+ assert(HasMoreTokens());
+ return m_tokens[m_next_token_index];
+}
+
+Optional<ParsedFunction>
+CPlusPlusNameParser::ParseFunctionImpl(bool expect_return_type) {
+ Bookmark start_position = SetBookmark();
+ if (expect_return_type) {
+ // Consume return type if it's expected.
+ if (!ConsumeTypename())
+ return None;
+ }
+
+ auto maybe_name = ParseFullNameImpl();
+ if (!maybe_name) {
+ return None;
+ }
+
+ size_t argument_start = GetCurrentPosition();
+ if (!ConsumeArguments()) {
+ return None;
+ }
+
+ size_t qualifiers_start = GetCurrentPosition();
+ SkipFunctionQualifiers();
+ size_t end_position = GetCurrentPosition();
+
+ ParsedFunction result;
+ result.name.basename = GetTextForRange(maybe_name.getValue().basename_range);
+ result.name.context = GetTextForRange(maybe_name.getValue().context_range);
+ result.arguments = GetTextForRange(Range(argument_start, qualifiers_start));
+ result.qualifiers = GetTextForRange(Range(qualifiers_start, end_position));
+ start_position.Remove();
+ return result;
+}
+
+Optional<ParsedFunction>
+CPlusPlusNameParser::ParseFuncPtr(bool expect_return_type) {
+ Bookmark start_position = SetBookmark();
+ if (expect_return_type) {
+ // Consume return type.
+ if (!ConsumeTypename())
+ return None;
+ }
+
+ if (!ConsumeToken(tok::l_paren))
+ return None;
+ if (!ConsumePtrsAndRefs())
+ return None;
+
+ {
+ Bookmark before_inner_function_pos = SetBookmark();
+ auto maybe_inner_function_name = ParseFunctionImpl(false);
+ if (maybe_inner_function_name)
+ if (ConsumeToken(tok::r_paren))
+ if (ConsumeArguments()) {
+ SkipFunctionQualifiers();
+ start_position.Remove();
+ before_inner_function_pos.Remove();
+ return maybe_inner_function_name;
+ }
+ }
+
+ auto maybe_inner_function_ptr_name = ParseFuncPtr(false);
+ if (maybe_inner_function_ptr_name)
+ if (ConsumeToken(tok::r_paren))
+ if (ConsumeArguments()) {
+ SkipFunctionQualifiers();
+ start_position.Remove();
+ return maybe_inner_function_ptr_name;
+ }
+ return None;
+}
+
+bool CPlusPlusNameParser::ConsumeArguments() {
+ return ConsumeBrackets(tok::l_paren, tok::r_paren);
+}
+
+bool CPlusPlusNameParser::ConsumeTemplateArgs() {
+ Bookmark start_position = SetBookmark();
+ if (!HasMoreTokens() || Peek().getKind() != tok::less)
+ return false;
+ Advance();
+
+ // Consuming template arguments is a bit trickier than consuming function
+ // arguments, because '<' '>' brackets are not always trivially balanced. In
+ // some rare cases tokens '<' and '>' can appear inside template arguments as
+ // arithmetic or shift operators not as template brackets. Examples:
+ // std::enable_if<(10u)<(64), bool>
+ // f<A<operator<(X,Y)::Subclass>>
+ // Good thing that compiler makes sure that really ambiguous cases of '>'
+ // usage should be enclosed within '()' brackets.
+ int template_counter = 1;
+ bool can_open_template = false;
+ while (HasMoreTokens() && template_counter > 0) {
+ tok::TokenKind kind = Peek().getKind();
+ switch (kind) {
+ case tok::greatergreater:
+ template_counter -= 2;
+ can_open_template = false;
+ Advance();
+ break;
+ case tok::greater:
+ --template_counter;
+ can_open_template = false;
+ Advance();
+ break;
+ case tok::less:
+ // '<' is an attempt to open a subteamplte
+ // check if parser is at the point where it's actually possible,
+ // otherwise it's just a part of an expression like 'sizeof(T)<(10)'. No
+ // need to do the same for '>' because compiler actually makes sure that
+ // '>' always surrounded by brackets to avoid ambiguity.
+ if (can_open_template)
+ ++template_counter;
+ can_open_template = false;
+ Advance();
+ break;
+ case tok::kw_operator: // C++ operator overloading.
+ if (!ConsumeOperator())
+ return false;
+ can_open_template = true;
+ break;
+ case tok::raw_identifier:
+ can_open_template = true;
+ Advance();
+ break;
+ case tok::l_square:
+ if (!ConsumeBrackets(tok::l_square, tok::r_square))
+ return false;
+ can_open_template = false;
+ break;
+ case tok::l_paren:
+ if (!ConsumeArguments())
+ return false;
+ can_open_template = false;
+ break;
+ default:
+ can_open_template = false;
+ Advance();
+ break;
+ }
+ }
+
+ if (template_counter != 0) {
+ return false;
+ }
+ start_position.Remove();
+ return true;
+}
+
+bool CPlusPlusNameParser::ConsumeAnonymousNamespace() {
+ Bookmark start_position = SetBookmark();
+ if (!ConsumeToken(tok::l_paren)) {
+ return false;
+ }
+ constexpr llvm::StringLiteral g_anonymous("anonymous");
+ if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
+ Peek().getRawIdentifier() == g_anonymous) {
+ Advance();
+ } else {
+ return false;
+ }
+
+ if (!ConsumeToken(tok::kw_namespace)) {
+ return false;
+ }
+
+ if (!ConsumeToken(tok::r_paren)) {
+ return false;
+ }
+ start_position.Remove();
+ return true;
+}
+
+bool CPlusPlusNameParser::ConsumeLambda() {
+ Bookmark start_position = SetBookmark();
+ if (!ConsumeToken(tok::l_brace)) {
+ return false;
+ }
+ constexpr llvm::StringLiteral g_lambda("lambda");
+ if (HasMoreTokens() && Peek().is(tok::raw_identifier) &&
+ Peek().getRawIdentifier() == g_lambda) {
+ // Put the matched brace back so we can use ConsumeBrackets
+ TakeBack();
+ } else {
+ return false;
+ }
+
+ if (!ConsumeBrackets(tok::l_brace, tok::r_brace)) {
+ return false;
+ }
+
+ start_position.Remove();
+ return true;
+}
+
+bool CPlusPlusNameParser::ConsumeBrackets(tok::TokenKind left,
+ tok::TokenKind right) {
+ Bookmark start_position = SetBookmark();
+ if (!HasMoreTokens() || Peek().getKind() != left)
+ return false;
+ Advance();
+
+ int counter = 1;
+ while (HasMoreTokens() && counter > 0) {
+ tok::TokenKind kind = Peek().getKind();
+ if (kind == right)
+ --counter;
+ else if (kind == left)
+ ++counter;
+ Advance();
+ }
+
+ assert(counter >= 0);
+ if (counter > 0) {
+ return false;
+ }
+ start_position.Remove();
+ return true;
+}
+
+bool CPlusPlusNameParser::ConsumeOperator() {
+ Bookmark start_position = SetBookmark();
+ if (!ConsumeToken(tok::kw_operator))
+ return false;
+
+ if (!HasMoreTokens()) {
+ return false;
+ }
+
+ const auto &token = Peek();
+ switch (token.getKind()) {
+ case tok::kw_new:
+ case tok::kw_delete:
+ // This is 'new' or 'delete' operators.
+ Advance();
+ // Check for array new/delete.
+ if (HasMoreTokens() && Peek().is(tok::l_square)) {
+ // Consume the '[' and ']'.
+ if (!ConsumeBrackets(tok::l_square, tok::r_square))
+ return false;
+ }
+ break;
+
+#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemberOnly) \
+ case tok::Token: \
+ Advance(); \
+ break;
+#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemberOnly)
+#include "clang/Basic/OperatorKinds.def"
+#undef OVERLOADED_OPERATOR
+#undef OVERLOADED_OPERATOR_MULTI
+
+ case tok::l_paren:
+ // Call operator consume '(' ... ')'.
+ if (ConsumeBrackets(tok::l_paren, tok::r_paren))
+ break;
+ return false;
+
+ case tok::l_square:
+ // This is a [] operator.
+ // Consume the '[' and ']'.
+ if (ConsumeBrackets(tok::l_square, tok::r_square))
+ break;
+ return false;
+
+ default:
+ // This might be a cast operator.
+ if (ConsumeTypename())
+ break;
+ return false;
+ }
+ start_position.Remove();
+ return true;
+}
+
+void CPlusPlusNameParser::SkipTypeQualifiers() {
+ while (ConsumeToken(tok::kw_const, tok::kw_volatile))
+ ;
+}
+
+void CPlusPlusNameParser::SkipFunctionQualifiers() {
+ while (ConsumeToken(tok::kw_const, tok::kw_volatile, tok::amp, tok::ampamp))
+ ;
+}
+
+bool CPlusPlusNameParser::ConsumeBuiltinType() {
+ bool result = false;
+ bool continue_parsing = true;
+ // Built-in types can be made of a few keywords like 'unsigned long long
+ // int'. This function consumes all built-in type keywords without checking
+ // if they make sense like 'unsigned char void'.
+ while (continue_parsing && HasMoreTokens()) {
+ switch (Peek().getKind()) {
+ case tok::kw_short:
+ case tok::kw_long:
+ case tok::kw___int64:
+ case tok::kw___int128:
+ case tok::kw_signed:
+ case tok::kw_unsigned:
+ case tok::kw_void:
+ case tok::kw_char:
+ case tok::kw_int:
+ case tok::kw_half:
+ case tok::kw_float:
+ case tok::kw_double:
+ case tok::kw___float128:
+ case tok::kw_wchar_t:
+ case tok::kw_bool:
+ case tok::kw_char16_t:
+ case tok::kw_char32_t:
+ result = true;
+ Advance();
+ break;
+ default:
+ continue_parsing = false;
+ break;
+ }
+ }
+ return result;
+}
+
+void CPlusPlusNameParser::SkipPtrsAndRefs() {
+ // Ignoring result.
+ ConsumePtrsAndRefs();
+}
+
+bool CPlusPlusNameParser::ConsumePtrsAndRefs() {
+ bool found = false;
+ SkipTypeQualifiers();
+ while (ConsumeToken(tok::star, tok::amp, tok::ampamp, tok::kw_const,
+ tok::kw_volatile)) {
+ found = true;
+ SkipTypeQualifiers();
+ }
+ return found;
+}
+
+bool CPlusPlusNameParser::ConsumeDecltype() {
+ Bookmark start_position = SetBookmark();
+ if (!ConsumeToken(tok::kw_decltype))
+ return false;
+
+ if (!ConsumeArguments())
+ return false;
+
+ start_position.Remove();
+ return true;
+}
+
+bool CPlusPlusNameParser::ConsumeTypename() {
+ Bookmark start_position = SetBookmark();
+ SkipTypeQualifiers();
+ if (!ConsumeBuiltinType() && !ConsumeDecltype()) {
+ if (!ParseFullNameImpl())
+ return false;
+ }
+ SkipPtrsAndRefs();
+ start_position.Remove();
+ return true;
+}
+
+Optional<CPlusPlusNameParser::ParsedNameRanges>
+CPlusPlusNameParser::ParseFullNameImpl() {
+ // Name parsing state machine.
+ enum class State {
+ Beginning, // start of the name
+ AfterTwoColons, // right after ::
+ AfterIdentifier, // right after alphanumerical identifier ([a-z0-9_]+)
+ AfterTemplate, // right after template brackets (<something>)
+ AfterOperator, // right after name of C++ operator
+ };
+
+ Bookmark start_position = SetBookmark();
+ State state = State::Beginning;
+ bool continue_parsing = true;
+ Optional<size_t> last_coloncolon_position = None;
+
+ while (continue_parsing && HasMoreTokens()) {
+ const auto &token = Peek();
+ switch (token.getKind()) {
+ case tok::raw_identifier: // Just a name.
+ if (state != State::Beginning && state != State::AfterTwoColons) {
+ continue_parsing = false;
+ break;
+ }
+ Advance();
+ state = State::AfterIdentifier;
+ break;
+ case tok::l_paren: {
+ if (state == State::Beginning || state == State::AfterTwoColons) {
+ // (anonymous namespace)
+ if (ConsumeAnonymousNamespace()) {
+ state = State::AfterIdentifier;
+ break;
+ }
+ }
+
+ // Type declared inside a function 'func()::Type'
+ if (state != State::AfterIdentifier && state != State::AfterTemplate &&
+ state != State::AfterOperator) {
+ continue_parsing = false;
+ break;
+ }
+ Bookmark l_paren_position = SetBookmark();
+ // Consume the '(' ... ') [const]'.
+ if (!ConsumeArguments()) {
+ continue_parsing = false;
+ break;
+ }
+ SkipFunctionQualifiers();
+
+ // Consume '::'
+ size_t coloncolon_position = GetCurrentPosition();
+ if (!ConsumeToken(tok::coloncolon)) {
+ continue_parsing = false;
+ break;
+ }
+ l_paren_position.Remove();
+ last_coloncolon_position = coloncolon_position;
+ state = State::AfterTwoColons;
+ break;
+ }
+ case tok::l_brace:
+ if (state == State::Beginning || state == State::AfterTwoColons) {
+ if (ConsumeLambda()) {
+ state = State::AfterIdentifier;
+ break;
+ }
+ }
+ continue_parsing = false;
+ break;
+ case tok::coloncolon: // Type nesting delimiter.
+ if (state != State::Beginning && state != State::AfterIdentifier &&
+ state != State::AfterTemplate) {
+ continue_parsing = false;
+ break;
+ }
+ last_coloncolon_position = GetCurrentPosition();
+ Advance();
+ state = State::AfterTwoColons;
+ break;
+ case tok::less: // Template brackets.
+ if (state != State::AfterIdentifier && state != State::AfterOperator) {
+ continue_parsing = false;
+ break;
+ }
+ if (!ConsumeTemplateArgs()) {
+ continue_parsing = false;
+ break;
+ }
+ state = State::AfterTemplate;
+ break;
+ case tok::kw_operator: // C++ operator overloading.
+ if (state != State::Beginning && state != State::AfterTwoColons) {
+ continue_parsing = false;
+ break;
+ }
+ if (!ConsumeOperator()) {
+ continue_parsing = false;
+ break;
+ }
+ state = State::AfterOperator;
+ break;
+ case tok::tilde: // Destructor.
+ if (state != State::Beginning && state != State::AfterTwoColons) {
+ continue_parsing = false;
+ break;
+ }
+ Advance();
+ if (ConsumeToken(tok::raw_identifier)) {
+ state = State::AfterIdentifier;
+ } else {
+ TakeBack();
+ continue_parsing = false;
+ }
+ break;
+ default:
+ continue_parsing = false;
+ break;
+ }
+ }
+
+ if (state == State::AfterIdentifier || state == State::AfterOperator ||
+ state == State::AfterTemplate) {
+ ParsedNameRanges result;
+ if (last_coloncolon_position) {
+ result.context_range = Range(start_position.GetSavedPosition(),
+ last_coloncolon_position.getValue());
+ result.basename_range =
+ Range(last_coloncolon_position.getValue() + 1, GetCurrentPosition());
+ } else {
+ result.basename_range =
+ Range(start_position.GetSavedPosition(), GetCurrentPosition());
+ }
+ start_position.Remove();
+ return result;
+ } else {
+ return None;
+ }
+}
+
+llvm::StringRef CPlusPlusNameParser::GetTextForRange(const Range &range) {
+ if (range.empty())
+ return llvm::StringRef();
+ assert(range.begin_index < range.end_index);
+ assert(range.begin_index < m_tokens.size());
+ assert(range.end_index <= m_tokens.size());
+ clang::Token &first_token = m_tokens[range.begin_index];
+ clang::Token &last_token = m_tokens[range.end_index - 1];
+ clang::SourceLocation start_loc = first_token.getLocation();
+ clang::SourceLocation end_loc = last_token.getLocation();
+ unsigned start_pos = start_loc.getRawEncoding();
+ unsigned end_pos = end_loc.getRawEncoding() + last_token.getLength();
+ return m_text.take_front(end_pos).drop_front(start_pos);
+}
+
+static const clang::LangOptions &GetLangOptions() {
+ static clang::LangOptions g_options;
+ static llvm::once_flag g_once_flag;
+ llvm::call_once(g_once_flag, []() {
+ g_options.LineComment = true;
+ g_options.C99 = true;
+ g_options.C11 = true;
+ g_options.CPlusPlus = true;
+ g_options.CPlusPlus11 = true;
+ g_options.CPlusPlus14 = true;
+ g_options.CPlusPlus17 = true;
+ });
+ return g_options;
+}
+
+static const llvm::StringMap<tok::TokenKind> &GetKeywordsMap() {
+ static llvm::StringMap<tok::TokenKind> g_map{
+#define KEYWORD(Name, Flags) {llvm::StringRef(#Name), tok::kw_##Name},
+#include "clang/Basic/TokenKinds.def"
+#undef KEYWORD
+ };
+ return g_map;
+}
+
+void CPlusPlusNameParser::ExtractTokens() {
+ if (m_text.empty())
+ return;
+ clang::Lexer lexer(clang::SourceLocation(), GetLangOptions(), m_text.data(),
+ m_text.data(), m_text.data() + m_text.size());
+ const auto &kw_map = GetKeywordsMap();
+ clang::Token token;
+ for (lexer.LexFromRawLexer(token); !token.is(clang::tok::eof);
+ lexer.LexFromRawLexer(token)) {
+ if (token.is(clang::tok::raw_identifier)) {
+ auto it = kw_map.find(token.getRawIdentifier());
+ if (it != kw_map.end()) {
+ token.setKind(it->getValue());
+ }
+ }
+
+ m_tokens.push_back(token);
+ }
+}