375 lines
13 KiB
C++
375 lines
13 KiB
C++
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
|
|
// This file is subject to the license terms in the LICENSE file
|
|
// found in the top-level directory of this distribution.
|
|
|
|
#include "tokenizer.hpp"
|
|
|
|
#include "libclang_visitor.hpp"
|
|
#include "parse_error.hpp"
|
|
|
|
using namespace cppast;
|
|
|
|
detail::token::token(const CXTranslationUnit& tu_unit, const CXToken& token)
|
|
: value_(clang_getTokenSpelling(tu_unit, token)), kind_(clang_getTokenKind(token))
|
|
{
|
|
}
|
|
|
|
namespace
|
|
{
|
|
bool cursor_is_function(CXCursorKind kind)
|
|
{
|
|
return kind == CXCursor_FunctionDecl || kind == CXCursor_CXXMethod
|
|
|| kind == CXCursor_Constructor || kind == CXCursor_Destructor
|
|
|| kind == CXCursor_ConversionFunction;
|
|
}
|
|
|
|
CXSourceLocation get_next_location(const CXTranslationUnit& tu, CXFile file,
|
|
const CXSourceLocation& loc, int inc = 1)
|
|
{
|
|
unsigned offset;
|
|
clang_getSpellingLocation(loc, nullptr, nullptr, nullptr, &offset);
|
|
return clang_getLocationForOffset(tu, file, offset + inc);
|
|
}
|
|
|
|
class simple_tokenizer
|
|
{
|
|
public:
|
|
explicit simple_tokenizer(const CXTranslationUnit& tu, const CXSourceRange& range,
|
|
const CXCursor& cur)
|
|
: tu_(tu)
|
|
{
|
|
clang_tokenize(tu, range, &tokens_, &no_);
|
|
DEBUG_ASSERT(no_ >= 1u, detail::parse_error_handler{}, cur, "no tokens available");
|
|
}
|
|
|
|
~simple_tokenizer()
|
|
{
|
|
clang_disposeTokens(tu_, tokens_, no_);
|
|
}
|
|
|
|
simple_tokenizer(const simple_tokenizer&) = delete;
|
|
simple_tokenizer& operator=(const simple_tokenizer&) = delete;
|
|
|
|
unsigned size() const noexcept
|
|
{
|
|
return no_;
|
|
}
|
|
|
|
const CXToken& operator[](unsigned i) const noexcept
|
|
{
|
|
return tokens_[i];
|
|
}
|
|
|
|
private:
|
|
CXTranslationUnit tu_;
|
|
CXToken* tokens_;
|
|
unsigned no_;
|
|
};
|
|
|
|
bool token_after_is(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur,
|
|
const CXSourceLocation& loc, const char* token_str)
|
|
{
|
|
auto loc_after = get_next_location(tu, file, loc);
|
|
|
|
simple_tokenizer tokenizer(tu, clang_getRange(loc, loc_after), cur);
|
|
detail::cxstring spelling(clang_getTokenSpelling(tu, tokenizer[0u]));
|
|
return spelling == token_str;
|
|
}
|
|
|
|
// clang_getCursorExtent() is somehow broken in various ways
|
|
// this function returns the actual CXSourceRange that covers all parts required for parsing
|
|
// might include more tokens
|
|
// this function is the reason you shouldn't use libclang
|
|
CXSourceRange get_extent(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur,
|
|
bool& unmunch)
|
|
{
|
|
unmunch = false;
|
|
|
|
auto extent = clang_getCursorExtent(cur);
|
|
auto begin = clang_getRangeStart(extent);
|
|
auto end = clang_getRangeEnd(extent);
|
|
|
|
auto kind = clang_getCursorKind(cur);
|
|
if (cursor_is_function(kind) || cursor_is_function(clang_getTemplateCursorKind(cur)))
|
|
{
|
|
auto range_shrunk = false;
|
|
|
|
// if a function we need to remove the body
|
|
// it does not need to be parsed
|
|
detail::visit_children(cur, [&](const CXCursor& child) {
|
|
if (clang_getCursorKind(child) == CXCursor_CompoundStmt
|
|
|| clang_getCursorKind(child) == CXCursor_CXXTryStmt
|
|
|| clang_getCursorKind(child) == CXCursor_InitListExpr)
|
|
{
|
|
auto child_extent = clang_getCursorExtent(child);
|
|
end = clang_getRangeStart(child_extent);
|
|
range_shrunk = true;
|
|
}
|
|
});
|
|
|
|
if (!range_shrunk && !token_after_is(tu, file, cur, end, ";"))
|
|
{
|
|
// we do not have a body, but it is not a declaration either
|
|
do
|
|
{
|
|
end = get_next_location(tu, file, end);
|
|
} while (!token_after_is(tu, file, cur, end, ";"));
|
|
}
|
|
else if (kind == CXCursor_CXXMethod)
|
|
// necessary for some reason
|
|
begin = get_next_location(tu, file, begin, -1);
|
|
}
|
|
else if (kind == CXCursor_TemplateTypeParameter && token_after_is(tu, file, cur, end, "("))
|
|
{
|
|
// if you have decltype as default argument for a type template parameter
|
|
// libclang doesn't include the parameters
|
|
auto next = get_next_location(tu, file, end);
|
|
auto prev = end;
|
|
for (auto paren_count = 1; paren_count != 0; next = get_next_location(tu, file, next))
|
|
{
|
|
if (token_after_is(tu, file, cur, next, "("))
|
|
++paren_count;
|
|
else if (token_after_is(tu, file, cur, next, ")"))
|
|
--paren_count;
|
|
prev = next;
|
|
}
|
|
#if CINDEX_VERSION_MINOR < 37
|
|
end = prev;
|
|
#else
|
|
end = next;
|
|
#endif
|
|
}
|
|
else if (kind == CXCursor_TemplateTemplateParameter
|
|
&& token_after_is(tu, file, cur, end, "<"))
|
|
{
|
|
// if you have a template template parameter in a template template parameter,
|
|
// the tokens are all messed up, only contain the `template`
|
|
|
|
// first: skip to closing angle bracket
|
|
// luckily no need to handle expressions here
|
|
auto next = get_next_location(tu, file, end, 2);
|
|
for (auto angle_count = 1; angle_count != 0; next = get_next_location(tu, file, next))
|
|
{
|
|
if (token_after_is(tu, file, cur, next, ">"))
|
|
--angle_count;
|
|
else if (token_after_is(tu, file, cur, next, ">>"))
|
|
angle_count -= 2;
|
|
else if (token_after_is(tu, file, cur, next, "<"))
|
|
++angle_count;
|
|
}
|
|
|
|
// second: skip until end of parameter
|
|
// no need to handle default, so look for '>' or ','
|
|
while (!token_after_is(tu, file, cur, next, ">")
|
|
&& !token_after_is(tu, file, cur, next, ","))
|
|
next = get_next_location(tu, file, next);
|
|
// now we found the proper end of the token
|
|
end = get_next_location(tu, file, next, -1);
|
|
}
|
|
else if ((kind == CXCursor_TemplateTypeParameter
|
|
|| kind == CXCursor_NonTypeTemplateParameter
|
|
|| kind == CXCursor_TemplateTemplateParameter)
|
|
&& !token_after_is(tu, file, cur, end, ">")
|
|
&& !token_after_is(tu, file, cur, end, ","))
|
|
{
|
|
DEBUG_ASSERT(token_after_is(tu, file, cur, get_next_location(tu, file, end, -2), ">>"),
|
|
detail::assert_handler{});
|
|
unmunch = true;
|
|
// need to shrink range anyway
|
|
end = get_next_location(tu, file, end, -1);
|
|
}
|
|
else if (kind == CXCursor_EnumDecl && !token_after_is(tu, file, cur, end, ";"))
|
|
{
|
|
while (!token_after_is(tu, file, cur, end, ";"))
|
|
end = get_next_location(tu, file, end);
|
|
}
|
|
else if (clang_isExpression(kind)
|
|
#if CINDEX_VERSION_MINOR < 37
|
|
|| kind == CXCursor_CXXBaseSpecifier || kind == CXCursor_TemplateTypeParameter
|
|
#endif
|
|
|| kind == CXCursor_FieldDecl || kind == CXCursor_ParmDecl
|
|
|| kind == CXCursor_NonTypeTemplateParameter
|
|
|| kind == CXCursor_TemplateTemplateParameter)
|
|
// need to shrink range by one
|
|
end = get_next_location(tu, file, end, -1);
|
|
|
|
return clang_getRange(begin, end);
|
|
}
|
|
}
|
|
|
|
detail::tokenizer::tokenizer(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur)
|
|
{
|
|
auto extent = get_extent(tu, file, cur, unmunch_);
|
|
|
|
simple_tokenizer tokenizer(tu, extent, cur);
|
|
tokens_.reserve(tokenizer.size());
|
|
for (auto i = 0u; i != tokenizer.size(); ++i)
|
|
tokens_.emplace_back(tu, tokenizer[i]);
|
|
}
|
|
|
|
void detail::skip(detail::token_stream& stream, const char* str)
|
|
{
|
|
if (*str)
|
|
{
|
|
// non-empty string
|
|
DEBUG_ASSERT(!stream.done(), parse_error_handler{}, stream.cursor(),
|
|
format("expected '", str, "', got exhausted stream"));
|
|
auto& token = stream.peek();
|
|
DEBUG_ASSERT(token == str, parse_error_handler{}, stream.cursor(),
|
|
format("expected '", str, "', got '", token.c_str(), "'"));
|
|
stream.bump();
|
|
}
|
|
}
|
|
|
|
namespace
|
|
{
|
|
bool starts_with(const char*& str, const detail::token& t)
|
|
{
|
|
if (std::strncmp(str, t.c_str(), t.value().length()) != 0)
|
|
return false;
|
|
str += t.value().length();
|
|
while (*str == ' ' || *str == '\t')
|
|
++str;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
bool detail::skip_if(detail::token_stream& stream, const char* str, bool multi_token)
|
|
{
|
|
if (!*str)
|
|
return true;
|
|
else if (stream.done())
|
|
return false;
|
|
auto save = stream.cur();
|
|
do
|
|
{
|
|
auto& token = stream.peek();
|
|
if (!starts_with(str, token) || (!multi_token && *str != '\0'))
|
|
{
|
|
stream.set_cur(save);
|
|
return false;
|
|
}
|
|
stream.bump();
|
|
} while (multi_token && *str);
|
|
return true;
|
|
}
|
|
|
|
detail::token_iterator detail::find_closing_bracket(detail::token_stream stream)
|
|
{
|
|
auto template_bracket = false;
|
|
auto open_bracket = stream.peek().c_str();
|
|
const char* close_bracket = nullptr;
|
|
if (skip_if(stream, "("))
|
|
close_bracket = ")";
|
|
else if (skip_if(stream, "{"))
|
|
close_bracket = "}";
|
|
else if (skip_if(stream, "["))
|
|
close_bracket = "]";
|
|
else if (skip_if(stream, "<"))
|
|
{
|
|
close_bracket = ">";
|
|
template_bracket = true;
|
|
}
|
|
else
|
|
DEBUG_UNREACHABLE(parse_error_handler{}, stream.cursor(),
|
|
format("expected a bracket, got '", stream.peek().c_str(), "'"));
|
|
|
|
auto bracket_count = 1;
|
|
auto paren_count = 0; // internal nested parenthesis
|
|
while (bracket_count != 0)
|
|
{
|
|
auto& cur = stream.get().value();
|
|
if (paren_count == 0 && cur == open_bracket)
|
|
++bracket_count;
|
|
else if (paren_count == 0 && cur == close_bracket)
|
|
--bracket_count;
|
|
else if (paren_count == 0 && template_bracket && cur == ">>")
|
|
// maximal munch
|
|
bracket_count -= 2u;
|
|
else if (cur == "(" || cur == "{" || cur == "[")
|
|
++paren_count;
|
|
else if (cur == ")" || cur == "}" || cur == "]")
|
|
--paren_count;
|
|
}
|
|
stream.bump_back();
|
|
DEBUG_ASSERT(paren_count == 0 && stream.peek().value() == close_bracket, parse_error_handler{},
|
|
stream.cursor(), "find_closing_bracket() internal parse error");
|
|
return stream.cur();
|
|
}
|
|
|
|
void detail::skip_brackets(detail::token_stream& stream)
|
|
{
|
|
auto closing = find_closing_bracket(stream);
|
|
stream.set_cur(std::next(closing));
|
|
}
|
|
|
|
namespace
|
|
{
|
|
bool skip_attribute_impl(detail::token_stream& stream)
|
|
{
|
|
if (skip_if(stream, "[") && stream.peek() == "[")
|
|
{
|
|
// C++11 attribute
|
|
// [[<attribute>]]
|
|
// ^
|
|
skip_brackets(stream);
|
|
// [[<attribute>]]
|
|
// ^
|
|
skip(stream, "]");
|
|
return true;
|
|
}
|
|
else if (skip_if(stream, "__attribute__"))
|
|
{
|
|
// GCC/clang attributes
|
|
// __attribute__(<attribute>)
|
|
// ^
|
|
skip_brackets(stream);
|
|
return true;
|
|
}
|
|
else if (skip_if(stream, "__declspec"))
|
|
{
|
|
// MSVC declspec
|
|
// __declspec(<attribute>)
|
|
// ^
|
|
skip_brackets(stream);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool detail::skip_attribute(detail::token_stream& stream)
|
|
{
|
|
auto any = false;
|
|
while (skip_attribute_impl(stream))
|
|
any = true;
|
|
return any;
|
|
}
|
|
|
|
namespace
|
|
{
|
|
bool is_identifier(char c)
|
|
{
|
|
return std::isalnum(c) || c == '_';
|
|
}
|
|
}
|
|
|
|
std::string detail::to_string(token_stream& stream, token_iterator end)
|
|
{
|
|
std::string result;
|
|
while (stream.cur() != end)
|
|
{
|
|
auto& token = stream.get();
|
|
if (!result.empty() && is_identifier(result.back()) && is_identifier(token.value()[0u]))
|
|
result += ' ';
|
|
result += token.c_str();
|
|
}
|
|
if (stream.unmunch())
|
|
{
|
|
DEBUG_ASSERT(!result.empty() && result.back() == '>', detail::assert_handler{});
|
|
result.pop_back();
|
|
DEBUG_ASSERT(!result.empty() && result.back() == '>', detail::assert_handler{});
|
|
}
|
|
return result;
|
|
}
|