Fix tokenization related bugs

2018-12-18 16:58:43 +01:00 · 2018-12-18 16:58:43 +01:00 · 758f26afce
commit 758f26afce
parent 21ed11b700 51c2fc6569
8 changed files with 365 additions and 158 deletions
--- a/src/libclang/cxtokenizer.cpp
+++ b/src/libclang/cxtokenizer.cpp
@ -9,6 +9,8 @@
 #include "libclang_visitor.hpp"
 #include "parse_error.hpp"

+#include <iostream> // TODO
+
 using namespace cppast;

 detail::cxtoken::cxtoken(const CXTranslationUnit& tu_unit, const CXToken& token)
@ -24,16 +26,36 @@ bool cursor_is_function(CXCursorKind kind)
           || kind == CXCursor_ConversionFunction;
 }

-CXSourceLocation get_next_location(const CXTranslationUnit& tu, CXFile file,
-                                   const CXSourceLocation& loc, int inc = 1)
+bool cursor_is_var(CXCursorKind kind)
 {
-    unsigned offset;
-    clang_getSpellingLocation(loc, nullptr, nullptr, nullptr, &offset);
-    if (inc >= 0)
-        offset += unsigned(inc);
-    else
-        offset -= unsigned(-inc);
-    return clang_getLocationForOffset(tu, file, offset);
+    return kind == CXCursor_VarDecl || kind == CXCursor_FieldDecl;
+}
+
+bool is_in_range(const CXSourceLocation& loc, const CXSourceRange& range)
+{
+    auto begin = clang_getRangeStart(range);
+    auto end   = clang_getRangeEnd(range);
+
+    CXFile   f_loc, f_begin, f_end;
+    unsigned l_loc, l_begin, l_end;
+    clang_getSpellingLocation(loc, &f_loc, &l_loc, nullptr, nullptr);
+    clang_getSpellingLocation(begin, &f_begin, &l_begin, nullptr, nullptr);
+    clang_getSpellingLocation(end, &f_end, &l_end, nullptr, nullptr);
+
+    return l_loc >= l_begin && l_loc < l_end && clang_File_isEqual(f_loc, f_begin);
+}
+
+// heuristic to detect when the type of a variable is declared inline,
+// i.e. `struct foo {} f`
+bool has_inline_type_definition(CXCursor var_decl)
+{
+    auto type_decl = clang_getTypeDeclaration(clang_getCursorType(var_decl));
+    if (clang_Cursor_isNull(type_decl))
+        return false;
+
+    auto type_loc  = clang_getCursorLocation(type_decl);
+    auto var_range = clang_getCursorExtent(var_decl);
+    return is_in_range(type_loc, var_range);
 }

 class simple_tokenizer
@ -59,238 +81,338 @@ public:

    const CXToken& operator[](unsigned i) const noexcept
    {
+        DEBUG_ASSERT(i < no_, detail::assert_handler{});
        return tokens_[i];
    }

+    std::string get_spelling(std::size_t length) noexcept
+    {
+        // might need multiple tokens, because [[, for example, is treated as two separate tokens
+
+        std::string result;
+        for (auto cur = 0u; cur < no_; ++cur)
+        {
+            auto cur_spelling = detail::cxstring(clang_getTokenSpelling(tu_, tokens_[cur]));
+            result += cur_spelling.c_str();
+            if (result.length() >= length)
+                return result;
+        }
+        return result;
+    }
+
 private:
    CXTranslationUnit tu_;
    CXToken*          tokens_;
    unsigned          no_;
 };

-bool token_after_is(const CXTranslationUnit& tu, const CXFile& file, const CXSourceLocation& loc,
-                    const char* token_str, int inc)
+CXSourceLocation get_next_location_impl(const CXTranslationUnit& tu, CXFile file,
+                                        const CXSourceLocation& loc, int inc = 1)
 {
-    auto loc_after = get_next_location(tu, file, loc, inc);
+    DEBUG_ASSERT(clang_Location_isFromMainFile(loc), detail::assert_handler{});
+
+    unsigned offset;
+    clang_getSpellingLocation(loc, nullptr, nullptr, nullptr, &offset);
+    if (inc >= 0)
+        offset += unsigned(inc);
+    else
+        offset -= unsigned(-inc);
+    return clang_getLocationForOffset(tu, file, offset);
+}
+
+CXSourceLocation get_next_location(const CXTranslationUnit& tu, const CXFile& file,
+                                   const CXSourceLocation& loc, std::size_t token_length)
+{
+    // simple move over by token_length
+    return get_next_location_impl(tu, file, loc, int(token_length));
+}
+
+CXSourceLocation get_prev_location(const CXTranslationUnit& tu, const CXFile& file,
+                                   const CXSourceLocation& loc, std::size_t token_length)
+{
+    auto inc = 1;
+    while (true)
+    {
+        auto loc_before = get_next_location_impl(tu, file, loc, -inc);
+        DEBUG_ASSERT(!clang_equalLocations(loc_before, loc), detail::assert_handler{});
+
+        if (!clang_Location_isFromMainFile(loc_before))
+            // out of range
+            return clang_getNullLocation();
+
+        simple_tokenizer tokenizer(tu, clang_getRange(loc_before, loc));
+
+        auto token_location = clang_getTokenLocation(tu, tokenizer[0]);
+        if (clang_equalLocations(loc_before, token_location))
+        {
+            // actually found a new token and not just whitespace
+            // loc_before is now the last character of the new token
+            // need to move by token_length - 1 to get to the first character
+            return get_next_location_impl(tu, file, loc, -1 * (inc + int(token_length) - 1));
+        }
+        else
+            ++inc;
+    }
+
+    return clang_getNullLocation();
+}
+
+bool token_at_is(const CXTranslationUnit& tu, const CXFile& file, const CXSourceLocation& loc,
+                 const char* token_str)
+{
+    auto length = std::strlen(token_str);
+
+    auto loc_after = get_next_location(tu, file, loc, length);
    if (!clang_Location_isFromMainFile(loc_after))
        return false;

-    simple_tokenizer tokenizer(tu, inc > 0 ? clang_getRange(loc, loc_after)
-                                           : clang_getRange(loc_after, loc));
-    if (tokenizer.size() == 0u)
+    simple_tokenizer tokenizer(tu, clang_getRange(loc, loc_after));
+    return tokenizer.get_spelling(length) == token_str;
+}
+
+bool consume_if_token_at_is(const CXTranslationUnit& tu, const CXFile& file, CXSourceLocation& loc,
+                            const char* token_str)
+{
+    auto length = std::strlen(token_str);
+
+    auto loc_after = get_next_location(tu, file, loc, length);
+    if (!clang_Location_isFromMainFile(loc_after))
        return false;

-    detail::cxstring spelling(clang_getTokenSpelling(tu, tokenizer[0u]));
-    return spelling == token_str;
+    simple_tokenizer tokenizer(tu, clang_getRange(loc, loc_after));
+    if (tokenizer.get_spelling(length) == token_str)
+    {
+        loc = loc_after;
+        return true;
+    }
+    else
+        return false;
 }

+bool token_before_is(const CXTranslationUnit& tu, const CXFile& file, const CXSourceLocation& loc,
+                     const char* token_str)
+{
+    auto length = std::strlen(token_str);
+
+    auto loc_before = get_prev_location(tu, file, loc, length);
+    if (!clang_Location_isFromMainFile(loc_before))
+        return false;
+
+    simple_tokenizer tokenizer(tu, clang_getRange(loc_before, loc));
+    return tokenizer.get_spelling(length) == token_str;
+}
+
+bool consume_if_token_before_is(const CXTranslationUnit& tu, const CXFile& file,
+                                CXSourceLocation& loc, const char* token_str)
+{
+    auto length = std::strlen(token_str);
+
+    auto loc_before = get_prev_location(tu, file, loc, length);
+    if (!clang_Location_isFromMainFile(loc_before))
+        return false;
+
+    simple_tokenizer tokenizer(tu, clang_getRange(loc_before, loc));
+    if (tokenizer.get_spelling(length) == token_str)
+    {
+        loc = loc_before;
+        return true;
+    }
+    else
+        return false;
+}
+
+struct Extent
+{
+    CXSourceRange first_part;
+    CXSourceRange second_part;
+};
+
 // clang_getCursorExtent() is somehow broken in various ways
 // this function returns the actual CXSourceRange that covers all parts required for parsing
 // might include more tokens
 // this function is the reason you shouldn't use libclang
-CXSourceRange get_extent(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur,
-                         bool& unmunch)
+Extent get_extent(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur)
 {
-    unmunch = false;
-
    auto extent = clang_getCursorExtent(cur);
    auto begin  = clang_getRangeStart(extent);
    auto end    = clang_getRangeEnd(extent);

    auto kind = clang_getCursorKind(cur);
+
+    // first need to extend the range to capture attributes that are before the declaration
    if (cursor_is_function(kind) || cursor_is_function(clang_getTemplateCursorKind(cur))
        || kind == CXCursor_VarDecl || kind == CXCursor_FieldDecl || kind == CXCursor_ParmDecl
        || kind == CXCursor_NonTypeTemplateParameter)
    {
-        if (token_after_is(tu, file, begin, "]", -2) && token_after_is(tu, file, begin, "]", -3))
+        while (token_before_is(tu, file, begin, "]]") || token_before_is(tu, file, begin, ")"))
        {
-            while (!token_after_is(tu, file, begin, "[", -1)
-                   && !token_after_is(tu, file, begin, "[", -2))
-                begin = get_next_location(tu, file, begin, -1);
-
-            begin = get_next_location(tu, file, begin, -3);
-            DEBUG_ASSERT(token_after_is(tu, file, begin, "[", 0)
-                             && token_after_is(tu, file, get_next_location(tu, file, begin), "[",
-                                               0),
-                         detail::parse_error_handler{}, cur,
-                         "error in pre-function attribute parsing");
-        }
-        else if (token_after_is(tu, file, begin, ")", -2))
-        {
-            // maybe alignas specifier
            auto save_begin = begin;
-
-            auto paren_count = 1;
-            begin            = get_next_location(tu, file, begin, -1);
-            for (auto last_begin = begin; paren_count != 0; last_begin = begin)
+            if (consume_if_token_before_is(tu, file, begin, "]]"))
            {
-                begin = get_next_location(tu, file, begin, -1);
-                if (token_after_is(tu, file, begin, "(", -1))
-                    --paren_count;
-                else if (token_after_is(tu, file, begin, ")", -1))
-                    ++paren_count;
-
-                DEBUG_ASSERT(!clang_equalLocations(last_begin, begin),
-                             detail::parse_error_handler{}, cur,
-                             "infinite loop in alignas parsing");
+                while (!consume_if_token_before_is(tu, file, begin, "[["))
+                    begin = get_prev_location(tu, file, begin, 1);
            }
-            begin = get_next_location(tu, file, begin, -(int(std::strlen("alignas")) + 1));
+            else if (consume_if_token_before_is(tu, file, begin, ")"))
+            {
+                // maybe alignas specifier

-            if (token_after_is(tu, file, begin, "alignas", 0))
-                begin = get_next_location(tu, file, begin, -1);
-            else
-                begin = save_begin;
+                auto paren_count = 1;
+                for (auto last_begin = begin; paren_count != 0; last_begin = begin)
+                {
+                    if (token_before_is(tu, file, begin, "("))
+                        --paren_count;
+                    else if (token_before_is(tu, file, begin, ")"))
+                        ++paren_count;
+
+                    begin = get_prev_location(tu, file, begin, 1);
+                    DEBUG_ASSERT(!clang_equalLocations(last_begin, begin),
+                                 detail::parse_error_handler{}, cur,
+                                 "infinite loop in alignas parsing");
+                }
+
+                if (!consume_if_token_before_is(tu, file, begin, "alignas"))
+                {
+                    // not alignas
+                    begin = save_begin;
+                    break;
+                }
+            }
        }
    }

    if (cursor_is_function(kind) || cursor_is_function(clang_getTemplateCursorKind(cur)))
    {
-        auto is_definition = false;
-        // if a function we need to remove the body
-        // it does not need to be parsed
-        detail::visit_children(cur, [&](const CXCursor& child) {
-            if (clang_getCursorKind(child) == CXCursor_CompoundStmt
-                || clang_getCursorKind(child) == CXCursor_CXXTryStmt
-                || clang_getCursorKind(child) == CXCursor_InitListExpr)
-            {
-                auto child_extent = clang_getCursorExtent(child);
-                end               = clang_getRangeStart(child_extent);
-                is_definition     = true;
-            }
-        });
-
-        if (!is_definition)
+        if (clang_CXXMethod_isDefaulted(cur) || !clang_isCursorDefinition(cur))
        {
-            // i have no idea why this is necessary
-            is_definition = token_after_is(tu, file, end, "{", 0)
-                            || token_after_is(tu, file, end, "try", 0)
-                            || token_after_is(tu, file, end, ":", 0);
-            if (is_definition)
-                // need to extend range here to include the token
-                end = get_next_location(tu, file, end);
+            // defaulted or declaration: extend until semicolon
+            while (!token_at_is(tu, file, end, ";"))
+                end = get_next_location(tu, file, end, 1);
        }
-
-        if (!is_definition && !token_after_is(tu, file, end, ";", 0))
+        else
        {
-            // we do not have a body, but it is not a declaration either
-            do
-            {
-                end = get_next_location(tu, file, end);
-            } while (!token_after_is(tu, file, end, ";", 0));
+            // declaration: remove body, we don't care about that
+            auto has_children = false;
+            detail::visit_children(cur, [&](const CXCursor& child) {
+                if (has_children)
+                    return;
+                else if (clang_getCursorKind(child) == CXCursor_CompoundStmt
+                         || clang_getCursorKind(child) == CXCursor_CXXTryStmt
+                         || clang_getCursorKind(child) == CXCursor_InitListExpr)
+                {
+                    auto child_extent = clang_getCursorExtent(child);
+                    end               = clang_getRangeStart(child_extent);
+                    has_children      = true;
+                }
+            });
        }
-        else if (kind == CXCursor_CXXMethod)
-            // necessary for some reason
-            begin = get_next_location(tu, file, begin, -1);
-        else if (kind == CXCursor_Destructor && token_after_is(tu, file, end, ")", 0))
-            // necessary for some other reason
-            end = get_next_location(tu, file, end);
    }
-    else if (kind == CXCursor_TemplateTypeParameter && token_after_is(tu, file, end, "(", 0))
+    else if (cursor_is_var(kind) || cursor_is_var(clang_getTemplateCursorKind(cur)))
+    {
+        // need to extend until the semicolon
+        while (!token_at_is(tu, file, end, ";"))
+            end = get_next_location(tu, file, end, 1);
+
+        if (has_inline_type_definition(cur))
+        {
+            // the type is declared inline,
+            // remove the type definition from the range
+            auto type_cursor = clang_getTypeDeclaration(clang_getCursorType(cur));
+            auto type_extent = clang_getCursorExtent(type_cursor);
+
+            auto type_begin = clang_getRangeStart(type_extent);
+            auto type_end   = clang_getRangeEnd(type_extent);
+
+            return {clang_getRange(begin, type_begin), clang_getRange(type_end, end)};
+        }
+    }
+    else if (kind == CXCursor_TemplateTypeParameter && token_at_is(tu, file, end, "("))
    {
        // if you have decltype as default argument for a type template parameter
        // libclang doesn't include the parameters
-        auto next = get_next_location(tu, file, end);
+        auto next = get_next_location(tu, file, end, 1);
        auto prev = end;
-        for (auto paren_count = 1; paren_count != 0; next = get_next_location(tu, file, next))
+        for (auto paren_count = 1; paren_count != 0; next = get_next_location(tu, file, next, 1))
        {
-            if (token_after_is(tu, file, next, "(", 0))
+            if (token_at_is(tu, file, next, "("))
                ++paren_count;
-            else if (token_after_is(tu, file, next, ")", 0))
+            else if (token_at_is(tu, file, next, ")"))
                --paren_count;
            prev = next;
        }
        end = next;
    }
-    else if (kind == CXCursor_TemplateTemplateParameter && token_after_is(tu, file, end, "<", 0))
+    else if (kind == CXCursor_TemplateTemplateParameter && token_at_is(tu, file, end, "<"))
    {
        // if you have a template template parameter in a template template parameter,
        // the tokens are all messed up, only contain the `template`

        // first: skip to closing angle bracket
        // luckily no need to handle expressions here
-        auto next = get_next_location(tu, file, end, 2);
-        for (auto angle_count = 1; angle_count != 0; next = get_next_location(tu, file, next))
+        auto next = get_next_location(tu, file, end, 1);
+        for (auto angle_count = 1; angle_count != 0; next = get_next_location(tu, file, next, 1))
        {
-            if (token_after_is(tu, file, next, ">", 0))
+            if (token_at_is(tu, file, next, ">"))
                --angle_count;
-            else if (token_after_is(tu, file, next, ">>", 0))
+            else if (token_at_is(tu, file, next, ">>"))
                angle_count -= 2;
-            else if (token_after_is(tu, file, next, "<", 0))
+            else if (token_at_is(tu, file, next, "<"))
                ++angle_count;
        }

        // second: skip until end of parameter
        // no need to handle default, so look for '>' or ','
-        while (!token_after_is(tu, file, next, ">", 0) && !token_after_is(tu, file, next, ",", 0))
-            next = get_next_location(tu, file, next);
+        while (!token_at_is(tu, file, next, ">") && !token_at_is(tu, file, next, ","))
+            next = get_next_location(tu, file, next, 1);
        // now we found the proper end of the token
-        end = get_next_location(tu, file, next, -1);
+        end = get_prev_location(tu, file, next, 1);
    }
    else if ((kind == CXCursor_TemplateTypeParameter || kind == CXCursor_NonTypeTemplateParameter
-              || kind == CXCursor_TemplateTemplateParameter)
-             && token_after_is(tu, file, end, "...", 0))
+              || kind == CXCursor_TemplateTemplateParameter))
    {
        // variadic tokens in unnamed parameter not included
-        end = get_next_location(tu, file, end, 3);
-        if (token_after_is(tu, file, end, ".", 0))
-            // extra whitespace, so bump again
-            // this should all go away once I redid the whole token thing...
+        consume_if_token_at_is(tu, file, end, "...");
+    }
+    else if (kind == CXCursor_EnumDecl && !token_at_is(tu, file, end, ";"))
+    {
+        while (!token_at_is(tu, file, end, ";"))
            end = get_next_location(tu, file, end, 1);
-
-        DEBUG_ASSERT(token_after_is(tu, file, end, ">", 0) || token_after_is(tu, file, end, ",", 0),
-                     detail::parse_error_handler{}, cur,
-                     "unexpected token in variadic parameter workaround");
    }
-    else if ((kind == CXCursor_TemplateTypeParameter || kind == CXCursor_NonTypeTemplateParameter
-              || kind == CXCursor_TemplateTemplateParameter)
-             && !token_after_is(tu, file, end, ">", 0) && !token_after_is(tu, file, end, ",", 0))
-    {
-        DEBUG_ASSERT(token_after_is(tu, file, get_next_location(tu, file, end, -2), ">>", 0),
-                     detail::parse_error_handler{}, cur,
-                     "unexpected token in maximal munch workaround");
-        unmunch = true;
-        // need to shrink range anyway
-        end = get_next_location(tu, file, end, -1);
-    }
-    else if (kind == CXCursor_EnumDecl && !token_after_is(tu, file, end, ";", 0))
-    {
-        while (!token_after_is(tu, file, end, ";", 0))
-            end = get_next_location(tu, file, end);
-    }
-    else if (kind == CXCursor_EnumConstantDecl && !token_after_is(tu, file, end, ",", 0))
+    else if (kind == CXCursor_EnumConstantDecl && !token_at_is(tu, file, end, ","))
    {
        // need to support attributes
        // just give up and extend the range to the range of the entire enum...
        auto parent = clang_getCursorLexicalParent(cur);
        end         = clang_getRangeEnd(clang_getCursorExtent(parent));
    }
-    else if (kind == CXCursor_ParmDecl && !token_after_is(tu, file, end, "]", -1))
-        // need to shrink range by one
-        end = get_next_location(tu, file, end, -1);
-    else if (kind == CXCursor_FieldDecl || kind == CXCursor_NonTypeTemplateParameter
-             || kind == CXCursor_TemplateTemplateParameter)
-        // need to shrink range by one
-        end = get_next_location(tu, file, end, -1);
    else if (kind == CXCursor_UnexposedDecl)
    {
        // include semicolon, if necessary
-        if (token_after_is(tu, file, end, ";", 0))
-            end = get_next_location(tu, file, end);
+        if (token_at_is(tu, file, end, ";"))
+            end = get_next_location(tu, file, end, 1);
    }

-    return clang_getRange(begin, end);
+    return Extent{clang_getRange(begin, end), clang_getNullRange()};
 }
 } // namespace

 detail::cxtokenizer::cxtokenizer(const CXTranslationUnit& tu, const CXFile& file,
                                 const CXCursor& cur)
+: unmunch_(false)
 {
-    auto extent = get_extent(tu, file, cur, unmunch_);
+    auto extent = get_extent(tu, file, cur);

-    simple_tokenizer tokenizer(tu, extent);
+    simple_tokenizer tokenizer(tu, extent.first_part);
    tokens_.reserve(tokenizer.size());
    for (auto i = 0u; i != tokenizer.size(); ++i)
        tokens_.emplace_back(tu, tokenizer[i]);
+
+    if (!clang_Range_isNull(extent.second_part))
+    {
+        simple_tokenizer tokenizer(tu, extent.second_part);
+        tokens_.reserve(tokens_.size() + tokenizer.size());
+        for (auto i = 0u; i != tokenizer.size(); ++i)
+            tokens_.emplace_back(tu, tokenizer[i]);
+    }
 }

 void detail::skip(detail::cxtoken_stream& stream, const char* str)
--- a/src/libclang/preprocessor.cpp
+++ b/src/libclang/preprocessor.cpp
@ -437,10 +437,9 @@ clang_preprocess_result clang_preprocess_impl(const libclang_compile_config& c,
    auto         cmd = get_preprocess_command(c, full_path.c_str(), macro_path);
    tpl::Process process(cmd, "",
                         [&](const char* str, std::size_t n) {
-                             result.file.reserve(result.file.size() + n);
                             for (auto ptr = str; ptr != str + n; ++ptr)
                                 if (*ptr == '\t')
-                                     result.file += "  "; // convert to two spaces
+                                     result.file += ' '; // convert to single spaces
                                 else if (*ptr != '\r')
                                     result.file += *ptr;
                         },
--- a/test/cpp_attribute.cpp
+++ b/test/cpp_attribute.cpp
@ -42,6 +42,9 @@ alignas(type) int var;

 // keyword attributes
 [[const]] int k();
+
+// multiple attributes but separately
+[[a]] [[b]] [[c]] int l();
 )";

    auto file = parse({}, "cpp_attribute.cpp", code);
@ -122,9 +125,19 @@ alignas(type) int var;
                                       else if (e.name() == "k")
                                           check_attribute(attr, "const", type_safe::nullopt, false,
                                                           "", cpp_attribute_kind::unknown);
+                                       else if (e.name() == "l")
+                                       {
+                                           REQUIRE_NOTHROW(attributes.size() == 3);
+                                           check_attribute(attributes[0], "a", type_safe::nullopt,
+                                                           false, "", cpp_attribute_kind::unknown);
+                                           check_attribute(attributes[1], "b", type_safe::nullopt,
+                                                           false, "", cpp_attribute_kind::unknown);
+                                           check_attribute(attributes[2], "c", type_safe::nullopt,
+                                                           false, "", cpp_attribute_kind::unknown);
+                                       }
                                   },
                                   false);
-    REQUIRE(count == 10);
+    REQUIRE(count == 11);

    count = test_visit<cpp_class>(*file,
                                  [&](const cpp_entity& e) {
@ -140,6 +153,7 @@ alignas(type) int var;
    count = test_visit<cpp_variable>(*file,
                                     [&](const cpp_entity& e) {
                                         auto& attributes = e.attributes();
+                                         INFO(e.name());
                                         REQUIRE(attributes.size() == 1u);
                                         auto& attr = attributes.front();
                                         check_attribute(attr, "alignas", type_safe::nullopt, false,
@ -215,6 +229,27 @@ using o [[o]] = int;

 template <typename T>
 using p [[p]] = T;
+
+// constructor
+struct [[q]] q
+{
+    [[q]] q();
+};
+
+struct [[r]] r
+{
+    [[r]]
+    r();
+};
+
+// type defined inline
+struct [[inline_type]] inline_type
+{
+    [[field]] int field;
+}
+[[s]] s;
+
+int t [[t]];
 )";

    auto file = parse({}, "cpp_attribute__matching.cpp", code);
@ -242,5 +277,5 @@ using p [[p]] = T;

        return true;
    });
-    REQUIRE(count == 36u);
+    REQUIRE(count == 44u);
 }
--- a/test/cpp_function.cpp
+++ b/test/cpp_function.cpp
@ -61,6 +61,9 @@ void ns::l()

 /// ns::m m();
 ns::m m();
+
+/// void n(int i=int());
+void n(int i = int());
 )";

    auto check_body = [](const cpp_function& func, cpp_function_body_kind kind) {
@ -72,7 +75,7 @@ ns::m m();
    cpp_entity_index idx;
    auto             file  = parse(idx, "cpp_function.cpp", code);
    auto             count = test_visit<cpp_function>(*file, [&](const cpp_function& func) {
-        if (func.name() == "a" || func.name() == "b" || func.name() == "c")
+        if (func.name() == "a" || func.name() == "b" || func.name() == "c" || func.name() == "n")
        {
            REQUIRE(!func.noexcept_condition());
            REQUIRE(func.storage_class() == cpp_storage_class_none);
@ -150,6 +153,31 @@ ns::m m();
                REQUIRE(count == 1u);
                REQUIRE(func.is_variadic());
            }
+            else if (func.name() == "n")
+            {
+                REQUIRE(equal_types(idx, func.return_type(), *cpp_builtin_type::build(cpp_void)));
+                REQUIRE(func.signature() == "(int)");
+
+                auto count = 0u;
+                for (auto& param : func.parameters())
+                {
+                    if (param.name() == "i")
+                    {
+                        REQUIRE(equal_types(idx, param.type(), *cpp_builtin_type::build(cpp_int)));
+                        REQUIRE(param.default_value());
+                        REQUIRE(equal_expressions(param.default_value().value(),
+                                                  *cpp_unexposed_expression::
+                                                      build(cpp_pointer_type::build(
+                                                                cpp_builtin_type::build(cpp_int)),
+                                                            cpp_token_string::tokenize("int()"))));
+                    }
+                    else
+                        REQUIRE(false);
+                    ++count;
+                }
+                REQUIRE(count == 1u);
+                REQUIRE(!func.is_variadic());
+            }
        }
        else if (func.name() == "d" || func.name() == "e" || func.name() == "f")
        {
@ -243,7 +271,7 @@ ns::m m();
        else
            REQUIRE(false);
    });
-    REQUIRE(count == 14u);
+    REQUIRE(count == 15u);
 }

 TEST_CASE("static cpp_function")
--- a/test/cpp_member_function.cpp
+++ b/test/cpp_member_function.cpp
@ -297,8 +297,10 @@ struct foo
 /// foo<T>::foo(int);
 template <typename T>
 foo<T>::foo(int) {}
+; // there's a bug on MSVC's libclang, we have to give it a semicolon
 )";
    }
+    INFO(is_template);

    cpp_entity_index idx;
    auto             file  = parse(idx, "cpp_constructor.cpp", code);
@ -384,6 +386,15 @@ struct d : c
 /// virtual d::~d();
 d::~d() {}

+struct e : c
+{
+    /// virtual ~e() override final;
+    ~e() final;
+};
+
+/// virtual e::~e()=default;
+e::~e() = default;
+
 )";

    auto file  = parse({}, "cpp_destructor.cpp", code);
@ -425,8 +436,21 @@ d::~d() {}
                REQUIRE(dtor.virtual_info().value() == cpp_virtual_flags::override);
            REQUIRE(!dtor.noexcept_condition());
        }
+        else if (dtor.name() == "~e")
+        {
+            REQUIRE(dtor.virtual_info());
+            if (dtor.is_declaration())
+                REQUIRE(dtor.virtual_info().value()
+                        == (cpp_virtual_flags::override | cpp_virtual_flags::final));
+            else
+            {
+                REQUIRE(dtor.virtual_info().value() == cpp_virtual_flags::override);
+                REQUIRE(dtor.body_kind() == cpp_function_defaulted);
+            }
+            REQUIRE(!dtor.noexcept_condition());
+        }
        else
            REQUIRE(false);
    });
-    REQUIRE(count == 5u);
+    REQUIRE(count == 7u);
 }
--- a/test/cpp_preprocessor.cpp
+++ b/test/cpp_preprocessor.cpp
@ -93,8 +93,7 @@ namespace ns2
    }
 }

-// requires clang 4.0
-TEST_CASE("cpp_include_directive", "[!hide][clang4]")
+TEST_CASE("cpp_include_directive")
 {
    write_file("cpp_include_directive-header.hpp", R"(
 #define FOO a\
--- a/test/preprocessor.cpp
+++ b/test/preprocessor.cpp
@ -57,14 +57,10 @@ TEST_CASE("preprocessing use external macro")
    }

    auto file = parse({}, "preprocessing_external_macro.cpp", R"(
-#include <cmath>
-#ifdef _GLIBCXX_RELEASE
+#include <climits>

-// this requires libstdc++
-/// auto result=(__builtin_nanf(""));
-auto result = NAN;
-
-#endif
+/// auto result=8;
+auto result = CHAR_BIT;
 )",
                      fast_preprocessing);

--- a/tool/main.cpp
+++ b/tool/main.cpp
@ -40,6 +40,10 @@ void print_entity(std::ostream& out, const cppast::cpp_entity& e)
    if (cppast::is_definition(e))
        out << " [definition]";

+    // print number of attributes
+    if (!e.attributes().empty())
+        out << " [" << e.attributes().size() << " attribute(s)]";
+
    if (e.kind() == cppast::cpp_entity_kind::language_linkage_t)
        // no need to print additional information for language linkages
        out << '\n';