Add tokenizer
This commit is contained in:
parent
958100d3e3
commit
1d083978ca
5 changed files with 282 additions and 11 deletions
|
|
@ -5,12 +5,30 @@
|
|||
#include "debug_helper.hpp"
|
||||
|
||||
#include <cstdio>
|
||||
#include <clang-c/Index.h>
|
||||
#include <mutex>
|
||||
|
||||
#include "tokenizer.hpp"
|
||||
|
||||
using namespace cppast;
|
||||
|
||||
namespace
|
||||
{
|
||||
std::mutex mtx;
|
||||
}
|
||||
|
||||
void detail::print_cursor_info(const CXCursor& cur) noexcept
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
std::printf("[debug] cursor '%s' (%s)\n", cxstring(clang_getCursorDisplayName(cur)).c_str(),
|
||||
cxstring(clang_getCursorKindSpelling(cur.kind)).c_str());
|
||||
}
|
||||
|
||||
void detail::print_tokens(const detail::cxtranslation_unit& tu, const CXFile& file,
|
||||
const CXCursor& cur) noexcept
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(mtx);
|
||||
detail::tokenizer tokenizer(tu, file, cur);
|
||||
for (auto& token : tokenizer)
|
||||
std::printf("%s ", token.c_str());
|
||||
std::puts("\n");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ namespace cppast
|
|||
namespace detail
|
||||
{
|
||||
void print_cursor_info(const CXCursor& cur) noexcept;
|
||||
|
||||
void print_tokens(const cxtranslation_unit& tu, const CXFile& file,
|
||||
const CXCursor& cur) noexcept;
|
||||
}
|
||||
} // namespace cppast::detail
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@
|
|||
#include <utility>
|
||||
|
||||
#include <clang-c/Index.h>
|
||||
#include <type_safe/optional.hpp>
|
||||
|
||||
#include <cppast/detail/assert.hpp>
|
||||
|
||||
|
|
@ -89,22 +90,33 @@ namespace cppast
|
|||
class cxstring
|
||||
{
|
||||
public:
|
||||
explicit cxstring(CXString str) noexcept
|
||||
: str_(str), c_str_(clang_getCString(str)), length_(std::strlen(c_str_))
|
||||
explicit cxstring(CXString str) noexcept : str_(string(str))
|
||||
{
|
||||
}
|
||||
|
||||
cxstring(const cxstring&) = delete;
|
||||
cxstring& operator=(const cxstring&) = delete;
|
||||
cxstring(cxstring&& other) noexcept : str_(other.str_)
|
||||
{
|
||||
other.str_.reset();
|
||||
}
|
||||
|
||||
cxstring& operator=(cxstring&& other) noexcept
|
||||
{
|
||||
if (str_)
|
||||
clang_disposeString(str_.value().str);
|
||||
str_ = other.str_;
|
||||
other.str_.reset();
|
||||
return *this;
|
||||
}
|
||||
|
||||
~cxstring() noexcept
|
||||
{
|
||||
clang_disposeString(str_);
|
||||
if (str_)
|
||||
clang_disposeString(str_.value().str);
|
||||
}
|
||||
|
||||
const char* c_str() const noexcept
|
||||
{
|
||||
return c_str_;
|
||||
return str_ ? str_.value().c_str : "";
|
||||
}
|
||||
|
||||
char operator[](std::size_t i) const noexcept
|
||||
|
|
@ -114,13 +126,22 @@ namespace cppast
|
|||
|
||||
std::size_t length() const noexcept
|
||||
{
|
||||
return length_;
|
||||
return str_ ? str_.value().length : 0u;
|
||||
}
|
||||
|
||||
private:
|
||||
CXString str_;
|
||||
const char* c_str_;
|
||||
std::size_t length_;
|
||||
struct string
|
||||
{
|
||||
CXString str;
|
||||
const char* c_str;
|
||||
std::size_t length;
|
||||
|
||||
explicit string(CXString str)
|
||||
: str(std::move(str)), c_str(clang_getCString(str)), length(std::strlen(c_str))
|
||||
{
|
||||
}
|
||||
};
|
||||
type_safe::optional<string> str_;
|
||||
};
|
||||
|
||||
inline bool operator==(const cxstring& a, const cxstring& b) noexcept
|
||||
|
|
|
|||
166
src/libclang/tokenizer.cpp
Normal file
166
src/libclang/tokenizer.cpp
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
|
||||
// This file is subject to the license terms in the LICENSE file
|
||||
// found in the top-level directory of this distribution.
|
||||
|
||||
#include "tokenizer.hpp"
|
||||
|
||||
#include "libclang_visitor.hpp"
|
||||
|
||||
using namespace cppast;
|
||||
|
||||
detail::token::token(const detail::cxtranslation_unit& tu_unit, const CXToken& token)
|
||||
: value_(clang_getTokenSpelling(tu_unit.get(), token)), kind_(clang_getTokenKind(token))
|
||||
{
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
bool cursor_is_function(CXCursorKind kind)
|
||||
{
|
||||
return kind == CXCursor_FunctionDecl || kind == CXCursor_CXXMethod
|
||||
|| kind == CXCursor_Constructor || kind == CXCursor_Destructor
|
||||
|| kind == CXCursor_ConversionFunction;
|
||||
}
|
||||
|
||||
CXSourceLocation get_next_location(const CXTranslationUnit& tu, CXFile file,
|
||||
const CXSourceLocation& loc, int inc = 1)
|
||||
{
|
||||
unsigned offset;
|
||||
clang_getSpellingLocation(loc, nullptr, nullptr, nullptr, &offset);
|
||||
return clang_getLocationForOffset(tu, file, offset + inc);
|
||||
}
|
||||
|
||||
class simple_tokenizer
|
||||
{
|
||||
public:
|
||||
explicit simple_tokenizer(const CXTranslationUnit& tu, const CXSourceRange& range) : tu_(tu)
|
||||
{
|
||||
clang_tokenize(tu, range, &tokens_, &no_);
|
||||
DEBUG_ASSERT(no_ >= 1u, detail::assert_handler{});
|
||||
}
|
||||
|
||||
~simple_tokenizer()
|
||||
{
|
||||
clang_disposeTokens(tu_, tokens_, no_);
|
||||
}
|
||||
|
||||
simple_tokenizer(const simple_tokenizer&) = delete;
|
||||
simple_tokenizer& operator=(const simple_tokenizer&) = delete;
|
||||
|
||||
unsigned size() const noexcept
|
||||
{
|
||||
return no_;
|
||||
}
|
||||
|
||||
const CXToken& operator[](unsigned i) const noexcept
|
||||
{
|
||||
return tokens_[i];
|
||||
}
|
||||
|
||||
private:
|
||||
CXTranslationUnit tu_;
|
||||
CXToken* tokens_;
|
||||
unsigned no_;
|
||||
};
|
||||
|
||||
bool token_after_is(const CXTranslationUnit& tu, const CXFile& file,
|
||||
const CXSourceLocation& loc, const char* token_str)
|
||||
{
|
||||
auto loc_after = get_next_location(tu, file, loc);
|
||||
|
||||
simple_tokenizer tokenizer(tu, clang_getRange(loc, loc_after));
|
||||
detail::cxstring spelling(clang_getTokenSpelling(tu, tokenizer[0u]));
|
||||
return spelling == token_str;
|
||||
}
|
||||
|
||||
// clang_getCursorExtent() is somehow broken in various ways
|
||||
// this function returns the actual CXSourceRange that covers all parts required for parsing
|
||||
// might include more tokens
|
||||
// this function is the reason you shouldn't use libclang
|
||||
CXSourceRange get_extent(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur)
|
||||
{
|
||||
auto extent = clang_getCursorExtent(cur);
|
||||
auto begin = clang_getRangeStart(extent);
|
||||
auto end = clang_getRangeEnd(extent);
|
||||
|
||||
if (cursor_is_function(clang_getCursorKind(cur))
|
||||
|| cursor_is_function(clang_getTemplateCursorKind(cur)))
|
||||
{
|
||||
auto range_shrunk = false;
|
||||
|
||||
// if a function we need to remove the body
|
||||
// it does not need to be parsed
|
||||
detail::visit_children(cur, [&](const CXCursor& child) {
|
||||
if (clang_getCursorKind(child) == CXCursor_CompoundStmt
|
||||
|| clang_getCursorKind(child) == CXCursor_CXXTryStmt
|
||||
|| clang_getCursorKind(child) == CXCursor_InitListExpr)
|
||||
{
|
||||
auto child_extent = clang_getCursorExtent(child);
|
||||
end = clang_getRangeStart(child_extent);
|
||||
range_shrunk = true;
|
||||
return CXChildVisit_Break;
|
||||
}
|
||||
return CXChildVisit_Continue;
|
||||
});
|
||||
|
||||
if (!range_shrunk && !token_after_is(tu, file, end, ";"))
|
||||
{
|
||||
// we do not have a body, but it is not a declaration either
|
||||
do
|
||||
{
|
||||
end = get_next_location(tu, file, end);
|
||||
} while (!token_after_is(tu, file, end, ";"));
|
||||
}
|
||||
else if (clang_getCursorKind(cur) == CXCursor_CXXMethod)
|
||||
// necessary for some reason
|
||||
begin = get_next_location(tu, file, begin, -1);
|
||||
}
|
||||
else if (clang_getCursorKind(cur) == CXCursor_TemplateTypeParameter
|
||||
|| clang_getCursorKind(cur) == CXCursor_NonTypeTemplateParameter
|
||||
|| clang_getCursorKind(cur) == CXCursor_TemplateTemplateParameter
|
||||
|| clang_getCursorKind(cur) == CXCursor_ParmDecl)
|
||||
{
|
||||
if (clang_getCursorKind(cur) == CXCursor_TemplateTypeParameter
|
||||
&& token_after_is(tu, file, end, "("))
|
||||
{
|
||||
// if you have decltype as default argument for a type template parameter
|
||||
// libclang doesn't include the parameters
|
||||
auto next = get_next_location(tu, file, end);
|
||||
auto prev = end;
|
||||
for (auto paren_count = 1; paren_count != 0;
|
||||
next = get_next_location(tu, file, next))
|
||||
{
|
||||
if (token_after_is(tu, file, next, "("))
|
||||
++paren_count;
|
||||
else if (token_after_is(tu, file, next, ")"))
|
||||
--paren_count;
|
||||
prev = next;
|
||||
}
|
||||
end = prev;
|
||||
}
|
||||
}
|
||||
else if (clang_getCursorKind(cur) == CXCursor_TypeAliasDecl
|
||||
&& !token_after_is(tu, file, end, ";"))
|
||||
{
|
||||
// type alias tokens don't include everything
|
||||
do
|
||||
{
|
||||
end = get_next_location(tu, file, end);
|
||||
} while (!token_after_is(tu, file, end, ";"));
|
||||
end = get_next_location(tu, file, end);
|
||||
}
|
||||
|
||||
return clang_getRange(begin, end);
|
||||
}
|
||||
}
|
||||
|
||||
detail::tokenizer::tokenizer(const detail::cxtranslation_unit& tu, const CXFile& file,
|
||||
const CXCursor& cur)
|
||||
{
|
||||
auto extent = get_extent(tu.get(), file, cur);
|
||||
|
||||
simple_tokenizer tokenizer(tu.get(), extent);
|
||||
tokens_.reserve(tokenizer.size());
|
||||
for (auto i = 0u; i != tokenizer.size(); ++i)
|
||||
tokens_.emplace_back(tu, tokenizer[i]);
|
||||
}
|
||||
63
src/libclang/tokenizer.hpp
Normal file
63
src/libclang/tokenizer.hpp
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
// Copyright (C) 2017 Jonathan Müller <jonathanmueller.dev@gmail.com>
|
||||
// This file is subject to the license terms in the LICENSE file
|
||||
// found in the top-level directory of this distribution.
|
||||
|
||||
#ifndef CPPAST_TOKENIZER_HPP_INCLUDED
|
||||
#define CPPAST_TOKENIZER_HPP_INCLUDED
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "raii_wrapper.hpp"
|
||||
|
||||
namespace cppast
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
class token
|
||||
{
|
||||
public:
|
||||
explicit token(const cxtranslation_unit& tu_unit, const CXToken& token);
|
||||
|
||||
const cxstring& value() const noexcept
|
||||
{
|
||||
return value_;
|
||||
}
|
||||
|
||||
const char* c_str() const noexcept
|
||||
{
|
||||
return value_.c_str();
|
||||
}
|
||||
|
||||
CXTokenKind kind() const noexcept
|
||||
{
|
||||
return kind_;
|
||||
}
|
||||
|
||||
private:
|
||||
cxstring value_;
|
||||
CXTokenKind kind_;
|
||||
};
|
||||
|
||||
class tokenizer
|
||||
{
|
||||
public:
|
||||
explicit tokenizer(const cxtranslation_unit& tu, const CXFile& file,
|
||||
const CXCursor& cur);
|
||||
|
||||
std::vector<token>::const_iterator begin() const noexcept
|
||||
{
|
||||
return tokens_.begin();
|
||||
}
|
||||
|
||||
std::vector<token>::const_iterator end() const noexcept
|
||||
{
|
||||
return tokens_.end();
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<token> tokens_;
|
||||
};
|
||||
}
|
||||
} // namespace cppast::detail
|
||||
|
||||
#endif // CPPAST_TOKENIZER_HPP_INCLUDED
|
||||
Loading…
Add table
Add a link
Reference in a new issue