From cfac41c7b53facfa70c4b2ae73128336e8bff905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20M=C3=BCller?= Date: Thu, 12 Oct 2017 18:43:12 +0200 Subject: [PATCH 1/2] Rename tokenizer stuff to include cx prefix --- src/CMakeLists.txt | 4 +- src/libclang/class_parser.cpp | 8 +-- .../{tokenizer.cpp => cxtokenizer.cpp} | 27 +++---- .../{tokenizer.hpp => cxtokenizer.hpp} | 70 +++++++++---------- src/libclang/debug_helper.cpp | 4 +- src/libclang/enum_parser.cpp | 10 +-- src/libclang/expression_parser.cpp | 8 +-- src/libclang/function_parser.cpp | 40 +++++------ src/libclang/language_linkage_parser.cpp | 4 +- src/libclang/libclang_parser.cpp | 2 +- src/libclang/namespace_parser.cpp | 16 ++--- src/libclang/parse_functions.cpp | 8 +-- src/libclang/parse_functions.hpp | 10 +-- src/libclang/template_parser.cpp | 20 +++--- src/libclang/type_parser.cpp | 4 +- src/libclang/variable_parser.cpp | 6 +- 16 files changed, 121 insertions(+), 120 deletions(-) rename src/libclang/{tokenizer.cpp => cxtokenizer.cpp} (94%) rename src/libclang/{tokenizer.hpp => cxtokenizer.hpp} (65%) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 04e1714..1e0ccd6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -95,8 +95,8 @@ set(libclang_source libclang/preprocessor.hpp libclang/raii_wrapper.hpp libclang/template_parser.cpp - libclang/tokenizer.cpp - libclang/tokenizer.hpp + libclang/cxtokenizer.cpp + libclang/cxtokenizer.hpp libclang/type_parser.cpp libclang/variable_parser.cpp) diff --git a/src/libclang/class_parser.cpp b/src/libclang/class_parser.cpp index ea661ad..169a958 100644 --- a/src/libclang/class_parser.cpp +++ b/src/libclang/class_parser.cpp @@ -71,8 +71,8 @@ namespace auto access = convert_access(cur); auto is_virtual = clang_isVirtualBase(cur) != 0u; - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // [] [virtual] [] // can't use spelling to get the name @@ -108,8 +108,8 @@ std::unique_ptr detail::parse_cpp_class(const detail::parse_context& clang_getCursorLexicalParent(cur))) { // out-of-line definition - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); std::string name = detail::get_cursor_name(cur).c_str(); auto pos = name.find('<'); diff --git a/src/libclang/tokenizer.cpp b/src/libclang/cxtokenizer.cpp similarity index 94% rename from src/libclang/tokenizer.cpp rename to src/libclang/cxtokenizer.cpp index f6895c5..51c3a87 100644 --- a/src/libclang/tokenizer.cpp +++ b/src/libclang/cxtokenizer.cpp @@ -2,7 +2,7 @@ // This file is subject to the license terms in the LICENSE file // found in the top-level directory of this distribution. -#include "tokenizer.hpp" +#include "cxtokenizer.hpp" #include @@ -11,7 +11,7 @@ using namespace cppast; -detail::token::token(const CXTranslationUnit& tu_unit, const CXToken& token) +detail::cxtoken::cxtoken(const CXTranslationUnit& tu_unit, const CXToken& token) : value_(clang_getTokenSpelling(tu_unit, token)), kind_(clang_getTokenKind(token)) { } @@ -237,7 +237,8 @@ namespace } } -detail::tokenizer::tokenizer(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur) +detail::cxtokenizer::cxtokenizer(const CXTranslationUnit& tu, const CXFile& file, + const CXCursor& cur) { auto extent = get_extent(tu, file, cur, unmunch_); @@ -247,7 +248,7 @@ detail::tokenizer::tokenizer(const CXTranslationUnit& tu, const CXFile& file, co tokens_.emplace_back(tu, tokenizer[i]); } -void detail::skip(detail::token_stream& stream, const char* str) +void detail::skip(detail::cxtoken_stream& stream, const char* str) { if (*str) { @@ -263,7 +264,7 @@ void detail::skip(detail::token_stream& stream, const char* str) namespace { - bool starts_with(const char*& str, const detail::token& t) + bool starts_with(const char*& str, const detail::cxtoken& t) { if (std::strncmp(str, t.c_str(), t.value().length()) != 0) return false; @@ -274,7 +275,7 @@ namespace } } -bool detail::skip_if(detail::token_stream& stream, const char* str, bool multi_token) +bool detail::skip_if(detail::cxtoken_stream& stream, const char* str, bool multi_token) { if (!*str) return true; @@ -298,7 +299,7 @@ namespace { // whether or not the current angle bracket can be a comparison // note: this is a heuristic I hope works often enough - bool is_comparison(CXTokenKind last_kind, const detail::token& cur, CXTokenKind next_kind) + bool is_comparison(CXTokenKind last_kind, const detail::cxtoken& cur, CXTokenKind next_kind) { if (cur == "<") return last_kind == CXToken_Literal; @@ -308,7 +309,7 @@ namespace } } -detail::token_iterator detail::find_closing_bracket(detail::token_stream stream) +detail::cxtoken_iterator detail::find_closing_bracket(detail::cxtoken_stream stream) { auto template_bracket = false; auto open_bracket = stream.peek().c_str(); @@ -359,7 +360,7 @@ detail::token_iterator detail::find_closing_bracket(detail::token_stream stream) return stream.cur(); } -void detail::skip_brackets(detail::token_stream& stream) +void detail::skip_brackets(detail::cxtoken_stream& stream) { auto closing = find_closing_bracket(stream); stream.set_cur(std::next(closing)); @@ -367,7 +368,7 @@ void detail::skip_brackets(detail::token_stream& stream) namespace { - bool skip_attribute_impl(detail::token_stream& stream) + bool skip_attribute_impl(detail::cxtoken_stream& stream) { if (skip_if(stream, "[") && stream.peek() == "[") { @@ -401,7 +402,7 @@ namespace } } -bool detail::skip_attribute(detail::token_stream& stream) +bool detail::skip_attribute(detail::cxtoken_stream& stream) { auto any = false; while (skip_attribute_impl(stream)) @@ -432,7 +433,7 @@ namespace } } -cpp_token_string detail::to_string(token_stream& stream, token_iterator end) +cpp_token_string detail::to_string(cxtoken_stream& stream, cxtoken_iterator end) { cpp_token_string::builder builder; @@ -448,7 +449,7 @@ cpp_token_string detail::to_string(token_stream& stream, token_iterator end) return builder.finish(); } -bool detail::append_scope(detail::token_stream& stream, std::string& scope) +bool detail::append_scope(detail::cxtoken_stream& stream, std::string& scope) { // add identifiers and "::" to current scope name, // clear if there is any other token in between, or mismatched combination diff --git a/src/libclang/tokenizer.hpp b/src/libclang/cxtokenizer.hpp similarity index 65% rename from src/libclang/tokenizer.hpp rename to src/libclang/cxtokenizer.hpp index 78ac31b..bc311a2 100644 --- a/src/libclang/tokenizer.hpp +++ b/src/libclang/cxtokenizer.hpp @@ -2,8 +2,8 @@ // This file is subject to the license terms in the LICENSE file // found in the top-level directory of this distribution. -#ifndef CPPAST_TOKENIZER_HPP_INCLUDED -#define CPPAST_TOKENIZER_HPP_INCLUDED +#ifndef CPPAST_CXTOKENIZER_HPP_INCLUDED +#define CPPAST_CXTOKENIZER_HPP_INCLUDED #include #include @@ -16,10 +16,10 @@ namespace cppast { namespace detail { - class token + class cxtoken { public: - explicit token(const CXTranslationUnit& tu_unit, const CXToken& token); + explicit cxtoken(const CXTranslationUnit& tu_unit, const CXToken& token); const cxstring& value() const noexcept { @@ -41,40 +41,40 @@ namespace cppast CXTokenKind kind_; }; - inline bool operator==(const token& tok, const char* str) noexcept + inline bool operator==(const cxtoken& tok, const char* str) noexcept { return tok.value() == str; } - inline bool operator==(const char* str, const token& tok) noexcept + inline bool operator==(const char* str, const cxtoken& tok) noexcept { return str == tok.value(); } - inline bool operator!=(const token& tok, const char* str) noexcept + inline bool operator!=(const cxtoken& tok, const char* str) noexcept { return !(tok == str); } - inline bool operator!=(const char* str, const token& tok) noexcept + inline bool operator!=(const char* str, const cxtoken& tok) noexcept { return !(str == tok); } - using token_iterator = std::vector::const_iterator; + using cxtoken_iterator = std::vector::const_iterator; - class tokenizer + class cxtokenizer { public: - explicit tokenizer(const CXTranslationUnit& tu, const CXFile& file, - const CXCursor& cur); + explicit cxtokenizer(const CXTranslationUnit& tu, const CXFile& file, + const CXCursor& cur); - token_iterator begin() const noexcept + cxtoken_iterator begin() const noexcept { return tokens_.begin(); } - token_iterator end() const noexcept + cxtoken_iterator end() const noexcept { return tokens_.end(); } @@ -88,14 +88,14 @@ namespace cppast } private: - std::vector tokens_; - bool unmunch_; + std::vector tokens_; + bool unmunch_; }; - class token_stream + class cxtoken_stream { public: - explicit token_stream(const tokenizer& tokenizer, const CXCursor& cur) + explicit cxtoken_stream(const cxtokenizer& tokenizer, const CXCursor& cur) : cursor_(cur), begin_(tokenizer.begin()), cur_(begin_), @@ -104,7 +104,7 @@ namespace cppast { } - const token& peek() const noexcept + const cxtoken& peek() const noexcept { if (done()) return *std::prev(end_); @@ -123,7 +123,7 @@ namespace cppast --cur_; } - const token& get() noexcept + const cxtoken& get() noexcept { auto& result = peek(); bump(); @@ -140,22 +140,22 @@ namespace cppast return cursor_; } - token_iterator begin() const noexcept + cxtoken_iterator begin() const noexcept { return begin_; } - token_iterator cur() const noexcept + cxtoken_iterator cur() const noexcept { return cur_; } - token_iterator end() const noexcept + cxtoken_iterator end() const noexcept { return end_; } - void set_cur(token_iterator iter) noexcept + void set_cur(cxtoken_iterator iter) noexcept { cur_ = iter; } @@ -166,41 +166,41 @@ namespace cppast } private: - CXCursor cursor_; - token_iterator begin_, cur_, end_; - bool unmunch_; + CXCursor cursor_; + cxtoken_iterator begin_, cur_, end_; + bool unmunch_; }; // skips the next token // asserts that it has the given string - void skip(token_stream& stream, const char* str); + void skip(cxtoken_stream& stream, const char* str); // skips the next token if it has the given string // if multi_token == true, str can consist of multiple tokens optionally separated by whitespace - bool skip_if(token_stream& stream, const char* str, bool multi_token = false); + bool skip_if(cxtoken_stream& stream, const char* str, bool multi_token = false); // returns the location of the closing bracket // the current token must be (,[,{ or < // note: < might not work in the arguments of a template specialization - token_iterator find_closing_bracket(token_stream stream); + cxtoken_iterator find_closing_bracket(cxtoken_stream stream); // skips brackets // the current token must be (,[,{ or < // note: < might not work in the arguments of a template specialization - void skip_brackets(token_stream& stream); + void skip_brackets(cxtoken_stream& stream); // skips an attribute - bool skip_attribute(token_stream& stream); + bool skip_attribute(cxtoken_stream& stream); // converts a token range to a string - cpp_token_string to_string(token_stream& stream, token_iterator end); + cpp_token_string to_string(cxtoken_stream& stream, cxtoken_iterator end); // appends token to scope, if it is still valid // else clears it // note: does not consume the token if it is not valid, // returns false in that case - bool append_scope(token_stream& stream, std::string& scope); + bool append_scope(cxtoken_stream& stream, std::string& scope); } } // namespace cppast::detail -#endif // CPPAST_TOKENIZER_HPP_INCLUDED +#endif // CPPAST_CXTOKENIZER_HPP_INCLUDED diff --git a/src/libclang/debug_helper.cpp b/src/libclang/debug_helper.cpp index a3ab29a..ba4d38d 100644 --- a/src/libclang/debug_helper.cpp +++ b/src/libclang/debug_helper.cpp @@ -7,7 +7,7 @@ #include #include -#include "tokenizer.hpp" +#include "cxtokenizer.hpp" using namespace cppast; @@ -50,7 +50,7 @@ void detail::print_tokens(const CXTranslationUnit& tu, const CXFile& file, const CXCursor& cur) noexcept { std::lock_guard lock(mtx); - detail::tokenizer tokenizer(tu, file, cur); + detail::cxtokenizer tokenizer(tu, file, cur); for (auto& token : tokenizer) std::fprintf(stderr, "%s ", token.c_str()); std::fputs("\n", stderr); diff --git a/src/libclang/enum_parser.cpp b/src/libclang/enum_parser.cpp index 95eb886..9e7431f 100644 --- a/src/libclang/enum_parser.cpp +++ b/src/libclang/enum_parser.cpp @@ -20,8 +20,8 @@ namespace DEBUG_ASSERT(cur.kind == CXCursor_EnumConstantDecl, detail::parse_error_handler{}, cur, "unexpected child cursor of enum"); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // [], // or: [] = , @@ -47,9 +47,9 @@ namespace cpp_enum::builder make_enum_builder(const detail::parse_context& context, const CXCursor& cur, type_safe::optional& semantic_parent) { - auto name = detail::get_cursor_name(cur); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + auto name = detail::get_cursor_name(cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // [] enum [class] [] name [: type] { detail::skip_attribute(stream); diff --git a/src/libclang/expression_parser.cpp b/src/libclang/expression_parser.cpp index 9950675..43079e0 100644 --- a/src/libclang/expression_parser.cpp +++ b/src/libclang/expression_parser.cpp @@ -14,8 +14,8 @@ std::unique_ptr detail::parse_expression(const detail::parse_con auto kind = clang_getCursorKind(cur); DEBUG_ASSERT(clang_isExpression(kind), detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto type = parse_type(context, cur, clang_getCursorType(cur)); auto expr = to_string(stream, stream.end()); @@ -36,8 +36,8 @@ std::unique_ptr detail::parse_expression(const detail::parse_con } std::unique_ptr detail::parse_raw_expression(const parse_context&, - token_stream& stream, - token_iterator end, + cxtoken_stream& stream, + cxtoken_iterator end, std::unique_ptr type) { if (stream.done()) diff --git a/src/libclang/function_parser.cpp b/src/libclang/function_parser.cpp index f13974d..5d20e9b 100644 --- a/src/libclang/function_parser.cpp +++ b/src/libclang/function_parser.cpp @@ -92,7 +92,7 @@ namespace } // precondition: after the name - void skip_parameters(detail::token_stream& stream) + void skip_parameters(detail::cxtoken_stream& stream) { if (stream.peek() == "<") // specialization arguments @@ -218,7 +218,7 @@ namespace bool is_friend = false; }; - bool prefix_end(detail::token_stream& stream, const char* name, bool is_ctor) + bool prefix_end(detail::cxtoken_stream& stream, const char* name, bool is_ctor) { auto cur = stream.cur(); // name can have multiple tokens if it is an operator @@ -262,7 +262,7 @@ namespace return true; } - prefix_info parse_prefix_info(detail::token_stream& stream, const char* name, bool is_ctor) + prefix_info parse_prefix_info(detail::cxtoken_stream& stream, const char* name, bool is_ctor) { prefix_info result; @@ -302,7 +302,7 @@ namespace } }; - cpp_cv parse_cv(detail::token_stream& stream) + cpp_cv parse_cv(detail::cxtoken_stream& stream) { if (detail::skip_if(stream, "const")) { @@ -322,7 +322,7 @@ namespace return cpp_cv_none; } - cpp_reference parse_ref(detail::token_stream& stream) + cpp_reference parse_ref(detail::cxtoken_stream& stream) { if (detail::skip_if(stream, "&")) return cpp_ref_lvalue; @@ -332,7 +332,7 @@ namespace return cpp_ref_none; } - std::unique_ptr parse_noexcept(detail::token_stream& stream, + std::unique_ptr parse_noexcept(detail::cxtoken_stream& stream, const detail::parse_context& context) { if (!detail::skip_if(stream, "noexcept")) @@ -351,7 +351,7 @@ namespace return expr; } - cpp_function_body_kind parse_body_kind(detail::token_stream& stream, bool& pure_virtual) + cpp_function_body_kind parse_body_kind(detail::cxtoken_stream& stream, bool& pure_virtual) { pure_virtual = false; if (detail::skip_if(stream, "default")) @@ -369,7 +369,7 @@ namespace return cpp_function_declaration; } - void parse_body(detail::token_stream& stream, suffix_info& result, bool allow_virtual) + void parse_body(detail::cxtoken_stream& stream, suffix_info& result, bool allow_virtual) { auto pure_virtual = false; result.body_kind = parse_body_kind(stream, pure_virtual); @@ -385,7 +385,7 @@ namespace } // precondition: we've skipped the function parameters - suffix_info parse_suffix_info(detail::token_stream& stream, + suffix_info parse_suffix_info(detail::cxtoken_stream& stream, const detail::parse_context& context, bool allow_qualifier, bool allow_virtual) { @@ -488,8 +488,8 @@ namespace { auto name = detail::get_cursor_name(cur); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto prefix = parse_prefix_info(stream, name.c_str(), false); DEBUG_ASSERT(!prefix.is_virtual && !prefix.is_explicit, detail::parse_error_handler{}, cur, @@ -611,7 +611,7 @@ namespace template std::unique_ptr handle_suffix(const detail::parse_context& context, const CXCursor& cur, Builder& builder, - detail::token_stream& stream, bool is_virtual, + detail::cxtoken_stream& stream, bool is_virtual, type_safe::optional semantic_parent) { auto allow_qualifiers = set_qualifier(0, builder, cpp_cv_none, cpp_ref_none); @@ -640,8 +640,8 @@ std::unique_ptr detail::parse_cpp_member_function(const detail::pars detail::assert_handler{}); auto name = detail::get_cursor_name(cur); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto prefix = parse_prefix_info(stream, name.c_str(), false); DEBUG_ASSERT(!prefix.is_explicit, detail::parse_error_handler{}, cur, @@ -670,8 +670,8 @@ std::unique_ptr detail::parse_cpp_conversion_op(const detail::parse_ || clang_getTemplateCursorKind(cur) == CXCursor_ConversionFunction, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto prefix = parse_prefix_info(stream, "operator", false); // heuristic to find arguments tokens @@ -735,8 +735,8 @@ std::unique_ptr detail::parse_cpp_constructor(const detail::parse_co if (pos != std::string::npos) name.erase(pos); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto prefix = parse_prefix_info(stream, name.c_str(), true); DEBUG_ASSERT(!prefix.is_virtual, detail::parse_error_handler{}, cur, @@ -771,8 +771,8 @@ std::unique_ptr detail::parse_cpp_destructor(const detail::parse_con { DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_Destructor, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto prefix_info = parse_prefix_info(stream, "~", false); DEBUG_ASSERT(!prefix_info.is_constexpr && !prefix_info.is_explicit, detail::assert_handler{}); diff --git a/src/libclang/language_linkage_parser.cpp b/src/libclang/language_linkage_parser.cpp index d79f8cb..0477f21 100644 --- a/src/libclang/language_linkage_parser.cpp +++ b/src/libclang/language_linkage_parser.cpp @@ -17,8 +17,8 @@ std::unique_ptr detail::try_parse_cpp_language_linkage(const parse_c DEBUG_ASSERT(cur.kind == CXCursor_UnexposedDecl, detail::assert_handler{}); // not exposed currently - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // extern ... if (!detail::skip_if(stream, "extern")) diff --git a/src/libclang/libclang_parser.cpp b/src/libclang/libclang_parser.cpp index 12b602a..65ab58c 100644 --- a/src/libclang/libclang_parser.cpp +++ b/src/libclang/libclang_parser.cpp @@ -15,7 +15,7 @@ #include "parse_error.hpp" #include "parse_functions.hpp" #include "preprocessor.hpp" -#include "tokenizer.hpp" +#include "cxtokenizer.hpp" using namespace cppast; diff --git a/src/libclang/namespace_parser.cpp b/src/libclang/namespace_parser.cpp index f3b8797..1391565 100644 --- a/src/libclang/namespace_parser.cpp +++ b/src/libclang/namespace_parser.cpp @@ -16,8 +16,8 @@ namespace cpp_namespace::builder make_ns_builder(const detail::parse_context& context, const CXCursor& cur) { - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // [inline] namespace [] { auto is_inline = false; @@ -83,8 +83,8 @@ std::unique_ptr detail::parse_cpp_namespace_alias(const detail::pars { DEBUG_ASSERT(cur.kind == CXCursor_NamespaceAlias, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // namespace = ; detail::skip(stream, "namespace"); @@ -108,8 +108,8 @@ std::unique_ptr detail::parse_cpp_using_directive(const detail::pars { DEBUG_ASSERT(cur.kind == CXCursor_UsingDirective, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // using namespace ; detail::skip(stream, "using"); @@ -182,8 +182,8 @@ std::unique_ptr detail::parse_cpp_using_declaration( { DEBUG_ASSERT(cur.kind == CXCursor_UsingDeclaration, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // using ; detail::skip(stream, "using"); diff --git a/src/libclang/parse_functions.cpp b/src/libclang/parse_functions.cpp index 6b4b471..0b907a2 100644 --- a/src/libclang/parse_functions.cpp +++ b/src/libclang/parse_functions.cpp @@ -215,10 +215,10 @@ std::unique_ptr detail::parse_entity(const detail::parse_context& co detail::get_cursor_kind_spelling(cur).c_str(), "'")); // build unexposed entity - auto name = detail::get_cursor_name(cur); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); - auto spelling = detail::to_string(stream, stream.end()); + auto name = detail::get_cursor_name(cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); + auto spelling = detail::to_string(stream, stream.end()); std::unique_ptr entity; if (name.empty()) diff --git a/src/libclang/parse_functions.hpp b/src/libclang/parse_functions.hpp index 7ef94a1..b98d640 100644 --- a/src/libclang/parse_functions.hpp +++ b/src/libclang/parse_functions.hpp @@ -9,7 +9,7 @@ #include #include "raii_wrapper.hpp" -#include "tokenizer.hpp" // for convenience +#include "cxtokenizer.hpp" // for convenience #include "parse_error.hpp" // for convenience #include "preprocessor.hpp" @@ -76,8 +76,8 @@ namespace cppast // and ends at the given iterator // this is required for situations where there is no type exposed, // like default type of a template type parameter - std::unique_ptr parse_raw_type(const parse_context& context, token_stream& stream, - token_iterator end); + std::unique_ptr parse_raw_type(const parse_context& context, + cxtoken_stream& stream, cxtoken_iterator end); std::unique_ptr parse_expression(const parse_context& context, const CXCursor& cur); @@ -86,8 +86,8 @@ namespace cppast // this is required for situations where there is no expression cursor exposed, // like member initializers std::unique_ptr parse_raw_expression(const parse_context& context, - token_stream& stream, - token_iterator end, + cxtoken_stream& stream, + cxtoken_iterator end, std::unique_ptr type); // parse_entity() dispatches on the cursor type diff --git a/src/libclang/template_parser.cpp b/src/libclang/template_parser.cpp index b16bcee..de4ecef 100644 --- a/src/libclang/template_parser.cpp +++ b/src/libclang/template_parser.cpp @@ -50,9 +50,9 @@ namespace DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_TemplateTypeParameter, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); - auto name = detail::get_cursor_name(cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); + auto name = detail::get_cursor_name(cur); // syntax: typename/class [...] name [= ...] auto keyword = cpp_template_keyword::keyword_class; @@ -87,8 +87,8 @@ namespace auto type = clang_getCursorType(cur); auto def = detail::parse_default_value(context, cur, name.c_str()); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); // see if it is variadic // syntax a): some-tokens ... name some-tokens @@ -120,9 +120,9 @@ namespace DEBUG_ASSERT(clang_getCursorKind(cur) == CXCursor_TemplateTemplateParameter, detail::assert_handler{}); - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); - auto name = detail::get_cursor_name(cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); + auto name = detail::get_cursor_name(cur); // syntax: template <…> class/typename [...] name [= …] detail::skip(stream, "template"); @@ -263,8 +263,8 @@ namespace template void parse_arguments(Builder& b, const detail::parse_context& context, const CXCursor& cur) { - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); while (!stream.done() && !detail::skip_if(stream, detail::get_cursor_name(cur).c_str(), true)) diff --git a/src/libclang/type_parser.cpp b/src/libclang/type_parser.cpp index cdd841a..c850e8a 100644 --- a/src/libclang/type_parser.cpp +++ b/src/libclang/type_parser.cpp @@ -731,8 +731,8 @@ std::unique_ptr detail::parse_type(const detail::parse_context& contex } std::unique_ptr detail::parse_raw_type(const detail::parse_context&, - detail::token_stream& stream, - detail::token_iterator end) + detail::cxtoken_stream& stream, + detail::cxtoken_iterator end) { auto result = detail::to_string(stream, end); return cpp_unexposed_type::build(result.as_string()); diff --git a/src/libclang/variable_parser.cpp b/src/libclang/variable_parser.cpp index d4a65bf..f49e240 100644 --- a/src/libclang/variable_parser.cpp +++ b/src/libclang/variable_parser.cpp @@ -14,8 +14,8 @@ using namespace cppast; std::unique_ptr detail::parse_default_value(const detail::parse_context& context, const CXCursor& cur, const char* name) { - detail::tokenizer tokenizer(context.tu, context.file, cur); - detail::token_stream stream(tokenizer, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); + detail::cxtoken_stream stream(tokenizer, cur); auto has_default = false; auto got_name = *name == '\0'; @@ -57,7 +57,7 @@ std::unique_ptr detail::parse_cpp_variable(const detail::parse_conte // just look for thread local or constexpr // can't appear anywhere else, so good enough - detail::tokenizer tokenizer(context.tu, context.file, cur); + detail::cxtokenizer tokenizer(context.tu, context.file, cur); for (auto& token : tokenizer) if (token.value() == "thread_local") storage_class = From 15729206507f8c33bbb8e116fc17dc9012471721 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonathan=20M=C3=BCller?= Date: Thu, 19 Oct 2017 19:02:27 +0200 Subject: [PATCH 2/2] Add function to tokenize strings --- include/cppast/cpp_token.hpp | 26 +- src/code_generator.cpp | 22 +- src/cpp_token.cpp | 583 ++++++++++++++++++++++++++++++++ src/libclang/cxtokenizer.cpp | 22 +- src/libclang/type_parser.cpp | 4 +- test/CMakeLists.txt | 1 + test/cpp_function.cpp | 19 +- test/cpp_member_function.cpp | 11 +- test/cpp_member_variable.cpp | 2 +- test/cpp_static_assert.cpp | 18 +- test/cpp_template_parameter.cpp | 14 +- test/cpp_token.cpp | 129 +++++++ test/cpp_type_alias.cpp | 4 +- test/cpp_variable.cpp | 26 +- 14 files changed, 798 insertions(+), 83 deletions(-) create mode 100644 test/cpp_token.cpp diff --git a/include/cppast/cpp_token.hpp b/include/cppast/cpp_token.hpp index 1daf036..47d1073 100644 --- a/include/cppast/cpp_token.hpp +++ b/include/cppast/cpp_token.hpp @@ -15,12 +15,13 @@ namespace cppast /// The kinds of C++ tokens. enum class cpp_token_kind { - identifier, //< Any identifier. - keyword, //< Any keyword. - literal, //< Any literal. - punctuation, //< Any other punctuation. - - unknown, //< An unknown token. + identifier, //< Any identifier. + keyword, //< Any keyword. + int_literal, //< An integer literal. + float_literal, //< A floating point literal. + char_literal, //< A character literal. + string_literal, //< A string literal. + punctuation //< Any other punctuation. }; /// A C++ token. @@ -74,16 +75,15 @@ namespace cppast std::vector tokens_; }; + /// Tokenizes a string. + /// \effects Splits the string into C++ tokens. + /// The string must contain valid tokens and must already be preprocessed (i.e. translation phase 6 is already done). + /// \returns The tokenized string. + static cpp_token_string tokenize(std::string str); + /// \effects Creates it from a sequence of tokens. cpp_token_string(std::vector tokens) : tokens_(std::move(tokens)) {} - /// \effects Creates from a string. - /// \notes This does not do tokenization, it will only store a single, unknown token! - static cpp_token_string from_string(std::string str) - { - return cpp_token_string({cpp_token(cpp_token_kind::unknown, std::move(str))}); - } - /// \exclude target using iterator = std::vector::const_iterator; diff --git a/src/code_generator.cpp b/src/code_generator.cpp index 3837352..1546ea6 100644 --- a/src/code_generator.cpp +++ b/src/code_generator.cpp @@ -1158,7 +1158,7 @@ void detail::write_template_arguments( void detail::write_token_string(code_generator::output& output, const cpp_token_string& tokens) { - auto last_kind = cpp_token_kind::unknown; + auto last_kind = cpp_token_kind::punctuation; // neutral regarding whitespace for (auto& token : tokens) { switch (token.kind) @@ -1177,14 +1177,15 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_ output << operator_ws; break; - case cpp_token_kind::literal: - // determine kind of literal - if (token.spelling.front() == '\"') - output << string_literal(token.spelling); - else if (token.spelling.find('.') != std::string::npos) - output << float_literal(token.spelling); - else - output << int_literal(token.spelling); + case cpp_token_kind::int_literal: + output << int_literal(token.spelling); + break; + case cpp_token_kind::float_literal: + output << float_literal(token.spelling); + break; + case cpp_token_kind::char_literal: + case cpp_token_kind::string_literal: + output << string_literal(token.spelling); break; case cpp_token_kind::punctuation: @@ -1206,9 +1207,6 @@ void detail::write_token_string(code_generator::output& output, const cpp_token_ else output << punctuation(token.spelling); break; - - case cpp_token_kind::unknown: - output << token_seq(token.spelling); } last_kind = token.kind; diff --git a/src/cpp_token.cpp b/src/cpp_token.cpp index 3d68d0c..04d9ec5 100644 --- a/src/cpp_token.cpp +++ b/src/cpp_token.cpp @@ -6,6 +6,9 @@ #include #include +#include +#include + #include using namespace cppast; @@ -16,6 +19,586 @@ void cpp_token_string::builder::unmunch() tokens_.back().spelling = ">"; } +namespace +{ + template + bool starts_with(const char* ptr, const char (&str)[N]) + { + return std::strncmp(ptr, str, N - 1u) == 0; + } + + bool starts_with(const char* ptr, const std::string& str) + { + return std::strncmp(ptr, str.c_str(), str.size()) == 0; + } + + template + bool bump_if(const char*& ptr, const char (&str)[N]) + { + if (starts_with(ptr, str)) + { + ptr += N - 1; + return true; + } + else + return false; + } + + bool bump_if(const char*& ptr, const std::string& str) + { + if (starts_with(ptr, str)) + { + ptr += str.size(); + return true; + } + else + return false; + } + + bool is_identifier_nondigit(char c) + { + // assume ASCII + if (c >= 'a' && c <= 'z') + return true; + else if (c >= 'A' && c <= 'Z') + return true; + else if (c == '_') + return true; + else + // technically \uXXX is allowed as well, but I haven't seen that used ever + return false; + } + + bool is_digit(char c) + { + return c >= '0' && c <= '9'; + } + + bool is_hexadecimal_digit(char c) + { + return is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); + } + + type_safe::optional bump_identifier(const char*& ptr) + { + if (is_identifier_nondigit(*ptr)) + { + std::string result; + result += *ptr++; + + while (is_identifier_nondigit(*ptr) || is_digit(*ptr)) + result += *ptr++; + + return result; + } + else + return type_safe::nullopt; + } + + type_safe::optional identifier_token(const char*& ptr) + { + auto identifier = bump_identifier(ptr); + if (!identifier) + return type_safe::nullopt; + + static constexpr const char* keywords[] = {"alignas", + "alignof", + "asm", + "auto", + "bool", + "break", + "case", + "catch", + "char", + "char16_t", + "char32_t", + "class", + "const", + "constexpr", + "const_cast", + "continue", + "decltype", + "default", + "delete", + "do", + "double", + "dynamic_cast", + "else", + "enum", + "explicit", + "export", + "extern", + "false", + "float", + "for", + "friend", + "goto", + "if", + "inline", + "int", + "long", + "mutable", + "namespace", + "new", + "noexcept", + "nullptr", + "operator", + "private", + "protected", + "public", + "register", + "reinterpret_cast", + "return", + "short", + "signed", + "sizeof", + "static", + "static_assert", + "static_cast", + "struct", + "switch", + "template", + "this", + "thread_local", + "throw", + "true", + "try", + "typedef", + "typeid", + "typename", + "union", + "unsigned", + "using", + "virtual", + "void", + "volatile", + "wchar_t", + "while"}; + auto find_keyword = std::find(std::begin(keywords), std::end(keywords), identifier.value()); + if (find_keyword != std::end(keywords)) + return cpp_token(cpp_token_kind::keyword, identifier.value()); + else if (identifier == "and") + return cpp_token(cpp_token_kind::punctuation, "&&"); + else if (identifier == "and_eq") + return cpp_token(cpp_token_kind::punctuation, "&="); + else if (identifier == "bitand") + return cpp_token(cpp_token_kind::punctuation, "&"); + else if (identifier == "bitor") + return cpp_token(cpp_token_kind::punctuation, "|"); + else if (identifier == "compl") + return cpp_token(cpp_token_kind::punctuation, "~"); + else if (identifier == "not") + return cpp_token(cpp_token_kind::punctuation, "!"); + else if (identifier == "not_eq") + return cpp_token(cpp_token_kind::punctuation, "!="); + else if (identifier == "or") + return cpp_token(cpp_token_kind::punctuation, "||"); + else if (identifier == "or_eq") + return cpp_token(cpp_token_kind::punctuation, "|="); + else if (identifier == "xor") + return cpp_token(cpp_token_kind::punctuation, "^"); + else if (identifier == "xor_eq") + return cpp_token(cpp_token_kind::punctuation, "^="); + else + return cpp_token(cpp_token_kind::identifier, identifier.value()); + } + + void append_udl_suffix(std::string& literal, const char*& ptr) + { + if (auto id = identifier_token(ptr)) + literal += id.value().spelling; + } + + template + std::string parse_digit_sequence(const char*& ptr, DigitPredicate is_digit) + { + std::string result; + for (; is_digit(*ptr) || *ptr == '\''; ++ptr) + if (*ptr != '\'') + result += *ptr; + DEBUG_ASSERT(result.back() != '\'', detail::assert_handler{}); + return result; + } + + void append_integer_suffix(std::string& literal, const char*& ptr) + { + auto append_unsigned_suffix = [](std::string& literal, const char*& ptr) { + if (*ptr == 'u' || *ptr == 'U') + { + literal += *ptr++; + return true; + } + else + return false; + }; + auto append_long_suffix = [](std::string& literal, const char*& ptr) { + if (starts_with(ptr, "ll") || starts_with(ptr, "LL")) + { + literal += *ptr++; + literal += *ptr++; + return true; + } + else if (*ptr == 'l' || *ptr == 'L') + { + literal += *ptr++; + return true; + } + else + return false; + }; + + if (append_unsigned_suffix(literal, ptr)) + append_long_suffix(literal, ptr); + else if (append_long_suffix(literal, ptr)) + append_unsigned_suffix(literal, ptr); + else + append_udl_suffix(literal, ptr); + } + + void append_floating_point_suffix(std::string& literal, const char*& ptr) + { + if (*ptr == 'f' || *ptr == 'F') + literal += *ptr++; + else if (*ptr == 'l' || *ptr == 'L') + literal += *ptr++; + else + append_udl_suffix(literal, ptr); + } + + type_safe::optional parse_floating_point_exponent(const char*& ptr) + { + if (*ptr == 'e' || *ptr == 'E' || *ptr == 'p' || *ptr == 'P') + { + std::string result; + result += *ptr++; + if (*ptr == '+' || *ptr == '-') + result += *ptr++; + + result += parse_digit_sequence(ptr, &is_digit); + return result; + } + else + return type_safe::nullopt; + } + + type_safe::optional numeric_literal_token(const char*& ptr) + { + if (starts_with(ptr, "0b") || starts_with(ptr, "0B")) // binary integer literal + { + std::string result; + result += *ptr++; + result += *ptr++; + result += parse_digit_sequence(ptr, [](char c) { return c == '0' || c == '1'; }); + append_integer_suffix(result, ptr); + return cpp_token(cpp_token_kind::int_literal, result); + } + else if (starts_with(ptr, "0x") || starts_with(ptr, "0X")) // hexadecimal literal + { + std::string result; + result += *ptr++; + result += *ptr++; + result += parse_digit_sequence(ptr, &is_hexadecimal_digit); + + auto is_float = false; + if (*ptr == '.') + { + // floating point hexadecimal + is_float = true; + result += *ptr++; + result += parse_digit_sequence(ptr, &is_hexadecimal_digit); + } + + if (auto exp = parse_floating_point_exponent(ptr)) + { + is_float = true; + // floating point exponent + result += exp.value(); + } + + if (is_float) + append_floating_point_suffix(result, ptr); + else + append_integer_suffix(result, ptr); + + return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal, + result); + } + else if (is_digit(*ptr)) // octal and decimal literals + { + std::string result; + result += parse_digit_sequence(ptr, &is_digit); + + auto is_float = false; + if (*ptr == '.') + { + // floating point decimal + is_float = true; + result += *ptr++; + result += parse_digit_sequence(ptr, &is_hexadecimal_digit); + } + + if (auto exp = parse_floating_point_exponent(ptr)) + { + // floating point exponent + is_float = true; + result += exp.value(); + } + + if (is_float) + append_floating_point_suffix(result, ptr); + else + append_integer_suffix(result, ptr); + + return cpp_token(is_float ? cpp_token_kind::float_literal : cpp_token_kind::int_literal, + result); + } + else if (*ptr == '.' && is_digit(ptr[1])) + { + std::string result; + + // floating point fraction + result += *ptr++; + result += parse_digit_sequence(ptr, &is_digit); + + if (auto exp = parse_floating_point_exponent(ptr)) + result += exp.value(); + + append_floating_point_suffix(result, ptr); + return cpp_token(cpp_token_kind::float_literal, result); + } + else + return type_safe::nullopt; + } + + type_safe::optional parse_encoding_prefix(const char*& ptr) + { + if (bump_if(ptr, "u8")) + return "u8"; + else if (bump_if(ptr, "u")) + return "u"; + else if (bump_if(ptr, "U")) + return "U"; + else if (bump_if(ptr, "L")) + return "L"; + else + return type_safe::nullopt; + } + + type_safe::optional character_literal(const char*& ptr) + { + auto save = ptr; + auto prefix = parse_encoding_prefix(ptr); + if (*ptr != '\'') + { + ptr = save; + return type_safe::nullopt; + } + else + { + auto result = prefix.value_or(""); + result += *ptr++; + + while (*ptr != '\'') + { + DEBUG_ASSERT(*ptr, detail::assert_handler{}); + + if (*ptr == '\\') + result += *ptr++; + result += *ptr++; + } + result += *ptr++; + + append_udl_suffix(result, ptr); + return cpp_token(cpp_token_kind::char_literal, result); + } + } + + type_safe::optional string_literal(const char*& ptr) + { + auto save = ptr; + auto prefix = parse_encoding_prefix(ptr); + if (starts_with(ptr, "R\"")) + { + // raw string literal + auto result = prefix.value_or(""); + result += *ptr++; + result += *ptr++; + + std::string terminator; + terminator += ")"; + while (*ptr != '(') + { + result += *ptr; + terminator += *ptr++; + } + result += *ptr++; + terminator += '"'; + + while (!bump_if(ptr, terminator)) + { + DEBUG_ASSERT(ptr, detail::assert_handler{}); + result += *ptr++; + } + result += terminator; + + append_udl_suffix(result, ptr); + return cpp_token(cpp_token_kind::string_literal, result); + } + else if (starts_with(ptr, "\"")) + { + // regular string literal + auto result = prefix.value_or(""); + result += *ptr++; + + while (*ptr != '"') + { + DEBUG_ASSERT(*ptr, detail::assert_handler{}); + + if (*ptr == '\\') + result += *ptr++; + result += *ptr++; + } + result += *ptr++; + + append_udl_suffix(result, ptr); + return cpp_token(cpp_token_kind::string_literal, result); + } + else + { + ptr = save; + return type_safe::nullopt; + } + } + + type_safe::optional digraph_token(const char*& ptr) + { + if (bump_if(ptr, "<%")) + return cpp_token(cpp_token_kind::punctuation, "{"); + else if (bump_if(ptr, "%>")) + return cpp_token(cpp_token_kind::punctuation, "}"); + else if (starts_with(ptr, "<::") && ptr[3] != ':' && ptr[3] != '>') + // don't detect digraph in std::vector<::std::string> + return type_safe::nullopt; + else if (bump_if(ptr, "<:")) + return cpp_token(cpp_token_kind::punctuation, "["); + else if (bump_if(ptr, ":>")) + return cpp_token(cpp_token_kind::punctuation, "]"); + else if (bump_if(ptr, "%:%:")) + return cpp_token(cpp_token_kind::punctuation, "##"); + else if (bump_if(ptr, "%:")) + return cpp_token(cpp_token_kind::punctuation, "#"); + else + return type_safe::nullopt; + } + + type_safe::optional punctuation_token(const char*& ptr) + { + static constexpr const char* punctuations[] = { + // tokens staring with # + "##", + "#", + // tokens starting with . + "...", + ".*", + ".", + // tokens starting with : + "::", + ":", + // tokens starting with + + "+=", + "++", + "+", + // tokens starting with - + "->*", + "->", + "--", + "-=", + "-", + // tokens starting with * + "*=", + "*", + // tokens starting with / + "/=", + "/", + // tokens starting with % + "%=", + "%", + // tokens starting with ^ + "^=", + "^", + // tokens starting with & + "&=", + "&&", + "&", + // tokens starting with | + "|=", + "||", + "|", + // tokens starting with < + "<<=", + "<<", + "<=", + "<", + // tokens starting with > + ">>=", + ">>", + ">=", + ">", + // tokens starting with ! + "!=", + "!", + // tokens starting with = + "==", + "=", + // single tokens + "~", + ";", + "?", + ",", + "{", + "}", + "[", + "]", + "(", + ")", + }; + + for (auto punct : punctuations) + if (bump_if(ptr, punct)) + return cpp_token(cpp_token_kind::punctuation, punct); + + return type_safe::nullopt; + } +} + +cpp_token_string cpp_token_string::tokenize(std::string str) +{ + cpp_token_string::builder builder; + + auto ptr = str.c_str(); + while (*ptr) + { + if (auto num = numeric_literal_token(ptr)) + builder.add_token(num.value()); + else if (auto char_lit = character_literal(ptr)) + builder.add_token(char_lit.value()); + else if (auto str_lit = string_literal(ptr)) + builder.add_token(str_lit.value()); + else if (auto digraphs = digraph_token(ptr)) + builder.add_token(digraphs.value()); + else if (auto punct = punctuation_token(ptr)) + builder.add_token(punct.value()); + else if (auto id = identifier_token(ptr)) + builder.add_token(id.value()); + else if (*ptr == ' ' || *ptr == '\t' || *ptr == '\n' || *ptr == '\r') + ++ptr; + else + DEBUG_UNREACHABLE(detail::assert_handler{}); + } + + return builder.finish(); +} + namespace { bool is_identifier(char c) diff --git a/src/libclang/cxtokenizer.cpp b/src/libclang/cxtokenizer.cpp index 51c3a87..cc0c6de 100644 --- a/src/libclang/cxtokenizer.cpp +++ b/src/libclang/cxtokenizer.cpp @@ -412,9 +412,9 @@ bool detail::skip_attribute(detail::cxtoken_stream& stream) namespace { - cpp_token_kind get_kind(CXTokenKind kind) + cpp_token_kind get_kind(const detail::cxtoken& token) { - switch (kind) + switch (token.kind()) { case CXToken_Punctuation: return cpp_token_kind::punctuation; @@ -422,14 +422,26 @@ namespace return cpp_token_kind::keyword; case CXToken_Identifier: return cpp_token_kind::identifier; + case CXToken_Literal: - return cpp_token_kind::literal; + { + auto spelling = token.value().std_str(); + if (spelling.find('.') != std::string::npos) + return cpp_token_kind::float_literal; + else if (std::isdigit(spelling.front())) + return cpp_token_kind::int_literal; + else if (spelling.back() == '\'') + return cpp_token_kind::char_literal; + else + return cpp_token_kind::string_literal; + } + case CXToken_Comment: break; } DEBUG_UNREACHABLE(detail::assert_handler{}); - return cpp_token_kind ::literal; + return cpp_token_kind::punctuation; } } @@ -440,7 +452,7 @@ cpp_token_string detail::to_string(cxtoken_stream& stream, cxtoken_iterator end) while (stream.cur() != end) { auto& token = stream.get(); - builder.add_token(cpp_token(get_kind(token.kind()), token.c_str())); + builder.add_token(cpp_token(get_kind(token), token.c_str())); } if (stream.unmunch()) diff --git a/src/libclang/type_parser.cpp b/src/libclang/type_parser.cpp index c850e8a..5e0dd62 100644 --- a/src/libclang/type_parser.cpp +++ b/src/libclang/type_parser.cpp @@ -246,7 +246,7 @@ namespace return size_expr.empty() ? nullptr : cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_ulonglong), - cpp_token_string::from_string( + cpp_token_string::tokenize( std::string(size_expr.rbegin(), size_expr.rend()))); } @@ -488,7 +488,7 @@ namespace return cpp_decltype_type::build( cpp_unexposed_expression::build(cpp_unexposed_type::build(""), - cpp_token_string::from_string(spelling))); + cpp_token_string::tokenize(spelling))); }); } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 172e1cb..85282a6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -24,6 +24,7 @@ set(tests cpp_preprocessor.cpp cpp_static_assert.cpp cpp_template_parameter.cpp + cpp_token.cpp cpp_type_alias.cpp cpp_variable.cpp integration.cpp diff --git a/test/cpp_function.cpp b/test/cpp_function.cpp index 1208093..53cf6ed 100644 --- a/test/cpp_function.cpp +++ b/test/cpp_function.cpp @@ -104,7 +104,7 @@ void ns::l() *cpp_unexposed_expression:: build(cpp_pointer_type::build( cpp_builtin_type::build(cpp_float)), - cpp_token_string::from_string("nullptr")))); + cpp_token_string::tokenize("nullptr")))); } else REQUIRE(false); @@ -135,7 +135,7 @@ void ns::l() *cpp_decltype_type::build( cpp_unexposed_expression:: build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string("42"))))); + cpp_token_string::tokenize("42"))))); REQUIRE(!param.default_value()); } else @@ -162,16 +162,17 @@ void ns::l() equal_expressions(func.noexcept_condition().value(), *cpp_literal_expression::build(std::move(bool_t), "true"))); else if (func.name() == "e") - REQUIRE(equal_expressions(func.noexcept_condition().value(), - *cpp_unexposed_expression:: - build(std::move(bool_t), - cpp_token_string::from_string("false")))); + REQUIRE( + equal_expressions(func.noexcept_condition().value(), + *cpp_unexposed_expression::build(std::move(bool_t), + cpp_token_string::tokenize( + "false")))); else if (func.name() == "f") REQUIRE( equal_expressions(func.noexcept_condition().value(), - *cpp_unexposed_expression:: - build(std::move(bool_t), - cpp_token_string::from_string("noexcept(d())")))); + *cpp_unexposed_expression::build(std::move(bool_t), + cpp_token_string::tokenize( + "noexcept(d())")))); } else if (func.name() == "g" || func.name() == "h" || func.name() == "i" || func.name() == "j") diff --git a/test/cpp_member_function.cpp b/test/cpp_member_function.cpp index 6165a94..8cf9708 100644 --- a/test/cpp_member_function.cpp +++ b/test/cpp_member_function.cpp @@ -402,12 +402,11 @@ d::~d() {} REQUIRE(!dtor.is_virtual()); REQUIRE(dtor.body_kind() == cpp_function_definition); REQUIRE(dtor.noexcept_condition()); - REQUIRE( - equal_expressions(dtor.noexcept_condition().value(), - *cpp_unexposed_expression::build(cpp_builtin_type::build( - cpp_bool), - cpp_token_string::from_string( - "false")))); + REQUIRE(equal_expressions(dtor.noexcept_condition().value(), + *cpp_unexposed_expression::build(cpp_builtin_type::build( + cpp_bool), + cpp_token_string::tokenize( + "false")))); } else if (dtor.name() == "~c") { diff --git a/test/cpp_member_variable.cpp b/test/cpp_member_variable.cpp index 52db1d5..a8f339e 100644 --- a/test/cpp_member_variable.cpp +++ b/test/cpp_member_variable.cpp @@ -39,7 +39,7 @@ struct foo // all initializers are unexposed auto def = cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float), - cpp_token_string::from_string("3.14f")); + cpp_token_string::tokenize("3.14f")); REQUIRE(var.default_value()); REQUIRE(equal_expressions(var.default_value().value(), *def)); diff --git a/test/cpp_static_assert.cpp b/test/cpp_static_assert.cpp index ceed7b4..1a9c38b 100644 --- a/test/cpp_static_assert.cpp +++ b/test/cpp_static_assert.cpp @@ -34,17 +34,15 @@ struct foo REQUIRE(equal_expressions(assert.expression(), *cpp_literal_expression::build(std::move(bool_t), "true"))); else if (assert.message() == "a") - REQUIRE( - equal_expressions(assert.expression(), - *cpp_unexposed_expression::build(std::move(bool_t), - cpp_token_string::from_string( - "true||false")))); + REQUIRE(equal_expressions(assert.expression(), + *cpp_unexposed_expression::build(std::move(bool_t), + cpp_token_string::tokenize( + "true||false")))); else if (assert.message() == "b") - REQUIRE( - equal_expressions(assert.expression(), - *cpp_unexposed_expression::build(std::move(bool_t), - cpp_token_string::from_string( - "!B")))); + REQUIRE(equal_expressions(assert.expression(), + *cpp_unexposed_expression::build(std::move(bool_t), + cpp_token_string::tokenize( + "!B")))); else REQUIRE(false); }); diff --git a/test/cpp_template_parameter.cpp b/test/cpp_template_parameter.cpp index 74d5f26..e0a5de6 100644 --- a/test/cpp_template_parameter.cpp +++ b/test/cpp_template_parameter.cpp @@ -151,13 +151,13 @@ using d = void; cpp_builtin_type::build(cpp_char)))); REQUIRE(!param.is_variadic()); REQUIRE(param.default_value()); - REQUIRE(equal_expressions(param.default_value().value(), - *cpp_unexposed_expression:: - build(cpp_builtin_type::build( - cpp_nullptr), - cpp_token_string:: - from_string( - "nullptr")))); + REQUIRE( + equal_expressions(param.default_value().value(), + *cpp_unexposed_expression:: + build(cpp_builtin_type::build( + cpp_nullptr), + cpp_token_string::tokenize( + "nullptr")))); } else if (param.name() == "C") { diff --git a/test/cpp_token.cpp b/test/cpp_token.cpp new file mode 100644 index 0000000..2996e72 --- /dev/null +++ b/test/cpp_token.cpp @@ -0,0 +1,129 @@ +// Copyright (C) 2017 Jonathan Müller +// This file is subject to the license terms in the LICENSE file +// found in the top-level directory of this distribution. + +#include + +#include + +#include +#include + +using namespace cppast; + +void check_equal_tokens(const std::string& str, std::initializer_list tokens) +{ + auto token_str = cpp_token_string::tokenize(str); + INFO(str); + REQUIRE(token_str.end() - token_str.begin() == tokens.size()); + REQUIRE(std::equal(token_str.begin(), token_str.end(), tokens.begin())); +} + +TEST_CASE("tokenizer") +{ + SECTION("integer literals") + { + check_equal_tokens(" 1234 ", {cpp_token(cpp_token_kind::int_literal, "1234")}); + check_equal_tokens("1, 2", {cpp_token(cpp_token_kind::int_literal, "1"), + cpp_token(cpp_token_kind::punctuation, ","), + cpp_token(cpp_token_kind::int_literal, "2")}); + + // integer suffixes + check_equal_tokens("1234ul", {cpp_token(cpp_token_kind::int_literal, "1234ul")}); + check_equal_tokens("12'34LU", {cpp_token(cpp_token_kind::int_literal, "1234LU")}); + + // other integer formats + check_equal_tokens("01234", {cpp_token(cpp_token_kind::int_literal, "01234")}); + check_equal_tokens("0x1234AF", {cpp_token(cpp_token_kind::int_literal, "0x1234AF")}); + check_equal_tokens("0b101101", {cpp_token(cpp_token_kind::int_literal, "0b101101")}); + } + SECTION("floating point literals") + { + // floating point suffixes + check_equal_tokens("3.14", {cpp_token(cpp_token_kind::float_literal, "3.14")}); + check_equal_tokens("3.14f", {cpp_token(cpp_token_kind::float_literal, "3.14f")}); + check_equal_tokens("3.14L", {cpp_token(cpp_token_kind::float_literal, "3.14L")}); + + // missing parts + check_equal_tokens(".5", {cpp_token(cpp_token_kind::float_literal, ".5")}); + check_equal_tokens("1.", {cpp_token(cpp_token_kind::float_literal, "1.")}); + + // exponents + check_equal_tokens("1.0e4", {cpp_token(cpp_token_kind::float_literal, "1.0e4")}); + check_equal_tokens("1e4", {cpp_token(cpp_token_kind::float_literal, "1e4")}); + check_equal_tokens(".5e-2", {cpp_token(cpp_token_kind::float_literal, ".5e-2")}); + + // hexadecimal + check_equal_tokens("0xabc.def", {cpp_token(cpp_token_kind::float_literal, "0xabc.def")}); + check_equal_tokens("0x123p42", {cpp_token(cpp_token_kind::float_literal, "0x123p42")}); + } + SECTION("character literals") + { + check_equal_tokens(R"('a')", {cpp_token(cpp_token_kind::char_literal, R"('a')")}); + check_equal_tokens(R"(u8'a')", {cpp_token(cpp_token_kind::char_literal, R"(u8'a')")}); + check_equal_tokens(R"(U'a')", {cpp_token(cpp_token_kind::char_literal, R"(U'a')")}); + check_equal_tokens(R"('\'')", {cpp_token(cpp_token_kind::char_literal, R"('\'')")}); + } + SECTION("string literals") + { + check_equal_tokens(R"("hello")", {cpp_token(cpp_token_kind::string_literal, R"("hello")")}); + check_equal_tokens(R"(u8"he\"llo")", + {cpp_token(cpp_token_kind::string_literal, R"(u8"he\"llo")")}); + + check_equal_tokens(R"*(R"(hel\"lo)")*", + {cpp_token(cpp_token_kind::string_literal, R"*(R"(hel\"lo)")*")}); + check_equal_tokens(R"**(R"*(hello R"(foo)")*")**", + {cpp_token(cpp_token_kind::string_literal, + R"**(R"*(hello R"(foo)")*")**")}); + } + SECTION("UDLs") + { + check_equal_tokens("123_foo", {cpp_token(cpp_token_kind::int_literal, "123_foo")}); + check_equal_tokens("123.456_foo", + {cpp_token(cpp_token_kind::float_literal, "123.456_foo")}); + check_equal_tokens(R"("hi"_foo)", + {cpp_token(cpp_token_kind::string_literal, R"("hi"_foo)")}); + } + SECTION("identifiers") + { + check_equal_tokens("foo bar baz_a", {cpp_token(cpp_token_kind::identifier, "foo"), + cpp_token(cpp_token_kind::identifier, "bar"), + cpp_token(cpp_token_kind::identifier, "baz_a")}); + check_equal_tokens("constant", {cpp_token(cpp_token_kind::identifier, "constant")}); + } + SECTION("keywords") + { + // just test some + check_equal_tokens("const float auto", {cpp_token(cpp_token_kind::keyword, "const"), + cpp_token(cpp_token_kind::keyword, "float"), + cpp_token(cpp_token_kind::keyword, "auto")}); + } + SECTION("punctuations") + { + // just test munch things + check_equal_tokens("<< <= <", {cpp_token(cpp_token_kind::punctuation, "<<"), + cpp_token(cpp_token_kind::punctuation, "<="), + cpp_token(cpp_token_kind::punctuation, "<")}); + check_equal_tokens("- -- -> ->*", {cpp_token(cpp_token_kind::punctuation, "-"), + cpp_token(cpp_token_kind::punctuation, "--"), + cpp_token(cpp_token_kind::punctuation, "->"), + cpp_token(cpp_token_kind::punctuation, "->*")}); + check_equal_tokens("--->>>>", {cpp_token(cpp_token_kind::punctuation, "--"), + cpp_token(cpp_token_kind::punctuation, "->"), + cpp_token(cpp_token_kind::punctuation, ">>"), + cpp_token(cpp_token_kind::punctuation, ">")}); + + // alternative spellings + check_equal_tokens("and not xor", {cpp_token(cpp_token_kind::punctuation, "&&"), + cpp_token(cpp_token_kind::punctuation, "!"), + cpp_token(cpp_token_kind::punctuation, "^")}); + + // digraphs + check_equal_tokens("<% foo<::bar>", {cpp_token(cpp_token_kind::punctuation, "{"), + cpp_token(cpp_token_kind::identifier, "foo"), + cpp_token(cpp_token_kind::punctuation, "<"), + cpp_token(cpp_token_kind::punctuation, "::"), + cpp_token(cpp_token_kind::identifier, "bar"), + cpp_token(cpp_token_kind::punctuation, ">")}); + } +} diff --git a/test/cpp_type_alias.cpp b/test/cpp_type_alias.cpp index 2454efd..27a0939 100644 --- a/test/cpp_type_alias.cpp +++ b/test/cpp_type_alias.cpp @@ -334,7 +334,7 @@ typedef decltype(0) w; return cpp_literal_expression::build(std::move(type), std::move(size)); else return cpp_unexposed_expression::build(std::move(type), - cpp_token_string::from_string(std::move(size))); + cpp_token_string::tokenize(std::move(size))); }; cpp_entity_index idx; @@ -507,7 +507,7 @@ typedef decltype(0) w; { auto type = cpp_decltype_type::build( cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string("0"))); + cpp_token_string::tokenize("0"))); REQUIRE(equal_types(idx, alias.underlying_type(), *type)); } else diff --git a/test/cpp_variable.cpp b/test/cpp_variable.cpp index 83c6bd4..76e4905 100644 --- a/test/cpp_variable.cpp +++ b/test/cpp_variable.cpp @@ -101,14 +101,13 @@ int r[] = {0}; // unexposed due to implicit cast, I think type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string( - "42"))), + cpp_token_string::tokenize("42"))), cpp_storage_class_none, false, false); else if (var.name() == "c") check_variable(var, *cpp_builtin_type::build(cpp_float), type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_float), - cpp_token_string::from_string( + cpp_token_string::tokenize( "3.f+0.14f"))), cpp_storage_class_none, false, false); else if (var.name() == "d") @@ -126,8 +125,7 @@ int r[] = {0}; cpp_cv_const), type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string( - "12"))), + cpp_token_string::tokenize("12"))), cpp_storage_class_none, true, false); else if (var.name() == "i") { @@ -147,7 +145,7 @@ int r[] = {0}; *cpp_unexposed_expression::build(cpp_user_defined_type::build( cpp_type_ref(cpp_entity_id(""), "bar")), - cpp_token_string::from_string( + cpp_token_string::tokenize( "bar()"))), cpp_storage_class_none, false, false); return false; @@ -169,8 +167,7 @@ int r[] = {0}; check_variable(var, *cpp_auto_type::build(), type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string( - "128"))), + cpp_token_string::tokenize("128"))), cpp_storage_class_none, false, false); else if (var.name() == "n") check_variable(var, @@ -180,14 +177,13 @@ int r[] = {0}; cpp_ref_lvalue), type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string( - "m"))), + cpp_token_string::tokenize("m"))), cpp_storage_class_none, false, false); else if (var.name() == "o") check_variable(var, *cpp_decltype_type::build( cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string("0"))), + cpp_token_string::tokenize("0"))), nullptr, cpp_storage_class_none, false, false); else if (var.name() == "p") check_variable(var, @@ -196,13 +192,12 @@ int r[] = {0}; build(cpp_decltype_type::build( cpp_unexposed_expression:: build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string("o"))), + cpp_token_string::tokenize("o"))), cpp_cv_const), cpp_ref_lvalue), type_safe::ref( *cpp_unexposed_expression::build(cpp_builtin_type::build(cpp_int), - cpp_token_string::from_string( - "o"))), + cpp_token_string::tokenize("o"))), cpp_storage_class_none, false, false); else if (var.name() == "q") check_variable(var, @@ -219,8 +214,7 @@ int r[] = {0}; "1")), type_safe::ref( *cpp_unexposed_expression::build(cpp_unexposed_type::build(""), - cpp_token_string::from_string( - "{0}"))), + cpp_token_string::tokenize("{0}"))), cpp_storage_class_none, false, false); else REQUIRE(false);